001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.z;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.nio.ByteOrder;
024
025import org.apache.commons.compress.compressors.lzw.LZWInputStream;
026
027/**
028 * Input stream that decompresses .Z files.
029 * @NotThreadSafe
030 * @since 1.7
031 */
032public class ZCompressorInputStream extends LZWInputStream {
033    private static final int MAGIC_1 = 0x1f;
034    private static final int MAGIC_2 = 0x9d;
035    private static final int BLOCK_MODE_MASK = 0x80;
036    private static final int MAX_CODE_SIZE_MASK = 0x1f;
037    private final boolean blockMode;
038    private final int maxCodeSize;
039    private long totalCodesRead = 0;
040    
041    public ZCompressorInputStream(InputStream inputStream) throws IOException {
042        super(inputStream, ByteOrder.LITTLE_ENDIAN);
043        int firstByte = (int) in.readBits(8);
044        int secondByte = (int) in.readBits(8);
045        int thirdByte = (int) in.readBits(8);
046        if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) {
047            throw new IOException("Input is not in .Z format");
048        }
049        blockMode = (thirdByte & BLOCK_MODE_MASK) != 0;
050        maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK;
051        if (blockMode) {
052            setClearCode(codeSize);
053        }
054        initializeTables(maxCodeSize);
055        clearEntries();
056    }
057    
058    private void clearEntries() {
059        tableSize = 1 << 8;
060        if (blockMode) {
061            tableSize++;
062        }
063    }
064
065    /**
066     * {@inheritDoc}
067     * <p><strong>This method is only protected for technical reasons
068     * and is not part of Commons Compress' published API.  It may
069     * change or disappear without warning.</strong></p>
070     */
071    @Override
072    protected int readNextCode() throws IOException {
073        int code = super.readNextCode();
074        if (code >= 0) {
075            ++totalCodesRead;
076        }
077        return code;
078    }
079    
080    private void reAlignReading() throws IOException {
081        // "compress" works in multiples of 8 symbols, each codeBits bits long.
082        // When codeBits changes, the remaining unused symbols in the current
083        // group of 8 are still written out, in the old codeSize,
084        // as garbage values (usually zeroes) that need to be skipped.
085        long codeReadsToThrowAway = 8 - (totalCodesRead % 8);
086        if (codeReadsToThrowAway == 8) {
087            codeReadsToThrowAway = 0;
088        }
089        for (long i = 0; i < codeReadsToThrowAway; i++) {
090            readNextCode();
091        }
092        in.clearBitCache();
093    }
094    
095    /**
096     * {@inheritDoc}
097     * <p><strong>This method is only protected for technical reasons
098     * and is not part of Commons Compress' published API.  It may
099     * change or disappear without warning.</strong></p>
100     */
101    @Override
102    protected int addEntry(int previousCode, byte character) throws IOException {
103        final int maxTableSize = 1 << codeSize;
104        int r = addEntry(previousCode, character, maxTableSize);
105        if (tableSize == maxTableSize && codeSize < maxCodeSize) {
106            reAlignReading();
107            codeSize++;
108        }
109        return r;
110    }
111
112    /**
113     * {@inheritDoc}
114     * <p><strong>This method is only protected for technical reasons
115     * and is not part of Commons Compress' published API.  It may
116     * change or disappear without warning.</strong></p>
117     */
118    @Override
119    protected int decompressNextSymbol() throws IOException {
120        //
121        //                   table entry    table entry
122        //                  _____________   _____
123        //    table entry  /             \ /     \
124        //    ____________/               \       \
125        //   /           / \             / \       \
126        //  +---+---+---+---+---+---+---+---+---+---+
127        //  | . | . | . | . | . | . | . | . | . | . |
128        //  +---+---+---+---+---+---+---+---+---+---+
129        //  |<--------->|<------------->|<----->|<->|
130        //     symbol        symbol      symbol  symbol
131        //
132        final int code = readNextCode();
133        if (code < 0) {
134            return -1;
135        } else if (blockMode && code == clearCode) {
136            clearEntries();
137            reAlignReading();
138            codeSize = 9;
139            previousCode = -1;
140            return 0;
141        } else {
142            boolean addedUnfinishedEntry = false;
143            if (code == tableSize) {
144                addRepeatOfPreviousCode();
145                addedUnfinishedEntry = true;
146            } else if (code > tableSize) {
147                throw new IOException(String.format("Invalid %d bit code 0x%x", codeSize, code));
148            }
149            return expandCodeToOutputStack(code, addedUnfinishedEntry);
150        }
151    }
152    
153    /**
154     * Checks if the signature matches what is expected for a Unix compress file.
155     * 
156     * @param signature
157     *            the bytes to check
158     * @param length
159     *            the number of bytes to check
160     * @return true, if this stream is a Unix compress compressed
161     * stream, false otherwise
162     * 
163     * @since 1.9
164     */
165    public static boolean matches(byte[] signature, int length) {
166        return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2;
167    }
168
169}