001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.compress.archivers.zip;
020    
021    import java.io.EOFException;
022    import java.io.IOException;
023    import java.io.InputStream;
024    import java.io.PushbackInputStream;
025    import java.util.zip.CRC32;
026    import java.util.zip.DataFormatException;
027    import java.util.zip.Inflater;
028    import java.util.zip.ZipException;
029    
030    import org.apache.commons.compress.archivers.ArchiveEntry;
031    import org.apache.commons.compress.archivers.ArchiveInputStream;
032    
033    /**
034     * Implements an input stream that can read Zip archives.
035     * <p>
036     * Note that {@link ZipArchiveEntry#getSize()} may return -1 if the DEFLATE algorithm is used, as the size information
037     * is not available from the header.
038     * <p>
039     * The {@link ZipFile} class is preferred when reading from files.
040     *  
041     * @see ZipFile
042     * @NotThreadSafe
043     */
044    public class ZipArchiveInputStream extends ArchiveInputStream {
045    
046        private static final int SHORT = 2;
047        private static final int WORD = 4;
048    
049        /**
050         * The zip encoding to use for filenames and the file comment.
051         */
052        private final ZipEncoding zipEncoding;
053    
054        /**
055         * Whether to look for and use Unicode extra fields.
056         */
057        private final boolean useUnicodeExtraFields;
058    
059        private final InputStream in;
060    
061        private final Inflater inf = new Inflater(true);
062        private final CRC32 crc = new CRC32();
063    
064        private final byte[] buf = new byte[ZipArchiveOutputStream.BUFFER_SIZE];
065    
066        private ZipArchiveEntry current = null;
067        private boolean closed = false;
068        private boolean hitCentralDirectory = false;
069        private int readBytesOfEntry = 0, offsetInBuffer = 0;
070        private int bytesReadFromStream = 0;
071        private int lengthOfLastRead = 0;
072        private boolean hasDataDescriptor = false;
073    
074        private static final int LFH_LEN = 30;
075        /*
076          local file header signature     4 bytes  (0x04034b50)
077          version needed to extract       2 bytes
078          general purpose bit flag        2 bytes
079          compression method              2 bytes
080          last mod file time              2 bytes
081          last mod file date              2 bytes
082          crc-32                          4 bytes
083          compressed size                 4 bytes
084          uncompressed size               4 bytes
085          file name length                2 bytes
086          extra field length              2 bytes
087        */
088    
089        public ZipArchiveInputStream(InputStream inputStream) {
090            this(inputStream, ZipEncodingHelper.UTF8, true);
091        }
092    
093        /**
094         * @param encoding the encoding to use for file names, use null
095         * for the platform's default encoding
096         * @param useUnicodeExtraFields whether to use InfoZIP Unicode
097         * Extra Fields (if present) to set the file names.
098         */
099        public ZipArchiveInputStream(InputStream inputStream,
100                                     String encoding,
101                                     boolean useUnicodeExtraFields) {
102            zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
103            this.useUnicodeExtraFields = useUnicodeExtraFields;
104            in = new PushbackInputStream(inputStream, buf.length);
105        }
106    
107        public ZipArchiveEntry getNextZipEntry() throws IOException {
108            if (closed || hitCentralDirectory) {
109                return null;
110            }
111            if (current != null) {
112                closeEntry();
113            }
114            byte[] lfh = new byte[LFH_LEN];
115            try {
116                readFully(lfh);
117            } catch (EOFException e) {
118                return null;
119            }
120            ZipLong sig = new ZipLong(lfh);
121            if (sig.equals(ZipLong.CFH_SIG)) {
122                hitCentralDirectory = true;
123                return null;
124            }
125            if (!sig.equals(ZipLong.LFH_SIG)) {
126                return null;
127            }
128    
129            int off = WORD;
130            current = new ZipArchiveEntry();
131    
132            int versionMadeBy = ZipShort.getValue(lfh, off);
133            off += SHORT;
134            current.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT)
135                                & ZipFile.NIBLET_MASK);
136    
137            final int generalPurposeFlag = ZipShort.getValue(lfh, off);
138            final boolean hasEFS = 
139                (generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0;
140            final ZipEncoding entryEncoding =
141                hasEFS ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
142            hasDataDescriptor = (generalPurposeFlag & 8) != 0;
143    
144            off += SHORT;
145    
146            current.setMethod(ZipShort.getValue(lfh, off));
147            off += SHORT;
148    
149            long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfh, off));
150            current.setTime(time);
151            off += WORD;
152    
153            if (!hasDataDescriptor) {
154                current.setCrc(ZipLong.getValue(lfh, off));
155                off += WORD;
156    
157                current.setCompressedSize(ZipLong.getValue(lfh, off));
158                off += WORD;
159    
160                current.setSize(ZipLong.getValue(lfh, off));
161                off += WORD;
162            } else {
163                off += 3 * WORD;
164            }
165    
166            int fileNameLen = ZipShort.getValue(lfh, off);
167    
168            off += SHORT;
169    
170            int extraLen = ZipShort.getValue(lfh, off);
171            off += SHORT;
172    
173            byte[] fileName = new byte[fileNameLen];
174            readFully(fileName);
175            current.setName(entryEncoding.decode(fileName));
176    
177            byte[] extraData = new byte[extraLen];
178            readFully(extraData);
179            current.setExtra(extraData);
180    
181            if (!hasEFS && useUnicodeExtraFields) {
182                ZipUtil.setNameAndCommentFromExtraFields(current, fileName, null);
183            }
184            return current;
185        }
186    
187        public ArchiveEntry getNextEntry() throws IOException {
188            return getNextZipEntry();
189        }
190    
191        public int read(byte[] buffer, int start, int length) throws IOException {
192            if (closed) {
193                throw new IOException("The stream is closed");
194            }
195            if (inf.finished() || current == null) {
196                return -1;
197            }
198    
199            // avoid int overflow, check null buffer
200            if (start <= buffer.length && length >= 0 && start >= 0
201                && buffer.length - start >= length) {
202                if (current.getMethod() == ZipArchiveOutputStream.STORED) {
203                    int csize = (int) current.getSize();
204                    if (readBytesOfEntry >= csize) {
205                        return -1;
206                    }
207                    if (offsetInBuffer >= lengthOfLastRead) {
208                        offsetInBuffer = 0;
209                        if ((lengthOfLastRead = in.read(buf)) == -1) {
210                            return -1;
211                        }
212                        count(lengthOfLastRead);
213                        bytesReadFromStream += lengthOfLastRead;
214                    }
215                    int toRead = length > lengthOfLastRead
216                        ? lengthOfLastRead - offsetInBuffer
217                        : length;
218                    if ((csize - readBytesOfEntry) < toRead) {
219                        toRead = csize - readBytesOfEntry;
220                    }
221                    System.arraycopy(buf, offsetInBuffer, buffer, start, toRead);
222                    offsetInBuffer += toRead;
223                    readBytesOfEntry += toRead;
224                    crc.update(buffer, start, toRead);
225                    return toRead;
226                }
227                if (inf.needsInput()) {
228                    fill();
229                    if (lengthOfLastRead > 0) {
230                        bytesReadFromStream += lengthOfLastRead;
231                    }
232                }
233                int read = 0;
234                try {
235                    read = inf.inflate(buffer, start, length);
236                } catch (DataFormatException e) {
237                    throw new ZipException(e.getMessage());
238                }
239                if (read == 0 && inf.finished()) {
240                    return -1;
241                }
242                crc.update(buffer, start, read);
243                return read;
244            }
245            throw new ArrayIndexOutOfBoundsException();
246        }
247    
248        public void close() throws IOException {
249            if (!closed) {
250                closed = true;
251                in.close();
252            }
253        }
254    
255        public long skip(long value) throws IOException {
256            if (value >= 0) {
257                long skipped = 0;
258                byte[] b = new byte[1024];
259                while (skipped != value) {
260                    long rem = value - skipped;
261                    int x = read(b, 0, (int) (b.length > rem ? rem : b.length));
262                    if (x == -1) {
263                        return skipped;
264                    }
265                    skipped += x;
266                }
267                return skipped;
268            }
269            throw new IllegalArgumentException();
270        }
271    
272        /*
273         *  This test assumes that the zip file does not have any additional leading content,
274         *  which is something that is allowed by the specification (e.g. self-extracting zips)
275         */
276        public static boolean matches(byte[] signature, int length) {
277            if (length < ZipArchiveOutputStream.LFH_SIG.length) {
278                return false;
279            }
280    
281            return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
282                || checksig(signature, ZipArchiveOutputStream.EOCD_SIG); // empty zip
283        }
284    
285        private static boolean checksig(byte[] signature, byte[] expected){
286            for (int i = 0; i < expected.length; i++) {
287                if (signature[i] != expected[i]) {
288                    return false;
289                }
290            }
291            return true;        
292        }
293    
294        private void closeEntry() throws IOException {
295            if (closed) {
296                throw new IOException("The stream is closed");
297            }
298            if (current == null) {
299                return;
300            }
301            // Ensure all entry bytes are read
302            skip(Long.MAX_VALUE);
303            int inB;
304            if (current.getMethod() == ZipArchiveOutputStream.DEFLATED) {
305                inB = inf.getTotalIn();
306            } else {
307                inB = readBytesOfEntry;
308            }
309            int diff = 0;
310    
311            // Pushback any required bytes
312            if ((diff = bytesReadFromStream - inB) != 0) {
313                ((PushbackInputStream) in).unread(buf,
314                                                  lengthOfLastRead - diff, diff);
315            }
316    
317            if (hasDataDescriptor) {
318                readFully(new byte[4 * WORD]);
319            }
320    
321            inf.reset();
322            readBytesOfEntry = offsetInBuffer = bytesReadFromStream =
323                lengthOfLastRead = 0;
324            crc.reset();
325            current = null;
326        }
327    
328        private void fill() throws IOException {
329            if (closed) {
330                throw new IOException("The stream is closed");
331            }
332            if ((lengthOfLastRead = in.read(buf)) > 0) {
333                inf.setInput(buf, 0, lengthOfLastRead);
334            }
335        }
336    
337        private void readFully(byte[] b) throws IOException {
338            int count = 0, x = 0;
339            while (count != b.length) {
340                count += x = in.read(b, count, b.length - count);
341                if (x == -1) {
342                    throw new EOFException();
343                }
344            }
345        }
346    }