001    /*
002     *  Licensed to the Apache Software Foundation (ASF) under one or more
003     *  contributor license agreements.  See the NOTICE file distributed with
004     *  this work for additional information regarding copyright ownership.
005     *  The ASF licenses this file to You under the Apache License, Version 2.0
006     *  (the "License"); you may not use this file except in compliance with
007     *  the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     *  Unless required by applicable law or agreed to in writing, software
012     *  distributed under the License is distributed on an "AS IS" BASIS,
013     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     *  See the License for the specific language governing permissions and
015     *  limitations under the License.
016     *
017     */
018    package org.apache.commons.compress.archivers.zip;
019    
020    import java.io.File;
021    import java.io.IOException;
022    import java.io.InputStream;
023    import java.io.RandomAccessFile;
024    import java.util.Collections;
025    import java.util.Enumeration;
026    import java.util.HashMap;
027    import java.util.Map;
028    import java.util.zip.Inflater;
029    import java.util.zip.InflaterInputStream;
030    import java.util.zip.ZipException;
031    
032    /**
033     * Replacement for <code>java.util.ZipFile</code>.
034     *
035     * <p>This class adds support for file name encodings other than UTF-8
036     * (which is required to work on ZIP files created by native zip tools
037     * and is able to skip a preamble like the one found in self
038     * extracting archives.  Furthermore it returns instances of
039     * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
040     * instead of <code>java.util.zip.ZipEntry</code>.</p>
041     *
042     * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
043     * have to reimplement all methods anyway.  Like
044     * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
045     * covers and supports compressed and uncompressed entries.</p>
046     *
047     * <p>The method signatures mimic the ones of
048     * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
049     *
050     * <ul>
051     *   <li>There is no getName method.</li>
052     *   <li>entries has been renamed to getEntries.</li>
053     *   <li>getEntries and getEntry return
054     *   <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
055     *   instances.</li>
056     *   <li>close is allowed to throw IOException.</li>
057     * </ul>
058     *
059     */
060    public class ZipFile {
061        private static final int HASH_SIZE = 509;
062        private static final int SHORT     =   2;
063        private static final int WORD      =   4;
064        static final int NIBLET_MASK = 0x0f;
065        static final int BYTE_SHIFT = 8;
066        private static final int POS_0 = 0;
067        private static final int POS_1 = 1;
068        private static final int POS_2 = 2;
069        private static final int POS_3 = 3;
070    
071        /**
072         * Maps ZipArchiveEntrys to Longs, recording the offsets of the local
073         * file headers.
074         */
075        private final Map entries = new HashMap(HASH_SIZE);
076    
077        /**
078         * Maps String to ZipArchiveEntrys, name -> actual entry.
079         */
080        private final Map nameMap = new HashMap(HASH_SIZE);
081    
082        private static final class OffsetEntry {
083            private long headerOffset = -1;
084            private long dataOffset = -1;
085        }
086    
087        /**
088         * The encoding to use for filenames and the file comment.
089         *
090         * <p>For a list of possible values see <a
091         * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
092         * Defaults to UTF-8.</p>
093         */
094        private final String encoding;
095    
096        /**
097         * The zip encoding to use for filenames and the file comment.
098         */
099        private final ZipEncoding zipEncoding;
100    
101        /**
102         * The actual data source.
103         */
104        private final RandomAccessFile archive;
105    
106        /**
107         * Whether to look for and use Unicode extra fields.
108         */
109        private final boolean useUnicodeExtraFields;
110    
111        /**
112         * Opens the given file for reading, assuming "UTF8" for file names.
113         *
114         * @param f the archive.
115         *
116         * @throws IOException if an error occurs while reading the file.
117         */
118        public ZipFile(File f) throws IOException {
119            this(f, ZipEncodingHelper.UTF8);
120        }
121    
122        /**
123         * Opens the given file for reading, assuming "UTF8".
124         *
125         * @param name name of the archive.
126         *
127         * @throws IOException if an error occurs while reading the file.
128         */
129        public ZipFile(String name) throws IOException {
130            this(new File(name), ZipEncodingHelper.UTF8);
131        }
132    
133        /**
134         * Opens the given file for reading, assuming the specified
135         * encoding for file names, scanning unicode extra fields.
136         *
137         * @param name name of the archive.
138         * @param encoding the encoding to use for file names, use null
139         * for the platform's default encoding
140         *
141         * @throws IOException if an error occurs while reading the file.
142         */
143        public ZipFile(String name, String encoding) throws IOException {
144            this(new File(name), encoding, true);
145        }
146    
147        /**
148         * Opens the given file for reading, assuming the specified
149         * encoding for file names and scanning for unicode extra fields.
150         *
151         * @param f the archive.
152         * @param encoding the encoding to use for file names, use null
153         * for the platform's default encoding
154         *
155         * @throws IOException if an error occurs while reading the file.
156         */
157        public ZipFile(File f, String encoding) throws IOException {
158            this(f, encoding, true);
159        }
160    
161        /**
162         * Opens the given file for reading, assuming the specified
163         * encoding for file names.
164         *
165         * @param f the archive.
166         * @param encoding the encoding to use for file names, use null
167         * for the platform's default encoding
168         * @param useUnicodeExtraFields whether to use InfoZIP Unicode
169         * Extra Fields (if present) to set the file names.
170         *
171         * @throws IOException if an error occurs while reading the file.
172         */
173        public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
174            throws IOException {
175            this.encoding = encoding;
176            this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
177            this.useUnicodeExtraFields = useUnicodeExtraFields;
178            archive = new RandomAccessFile(f, "r");
179            boolean success = false;
180            try {
181                Map entriesWithoutEFS = populateFromCentralDirectory();
182                resolveLocalFileHeaderData(entriesWithoutEFS);
183                success = true;
184            } finally {
185                if (!success) {
186                    try {
187                        archive.close();
188                    } catch (IOException e2) {
189                        // swallow, throw the original exception instead
190                    }
191                }
192            }
193        }
194    
195        /**
196         * The encoding to use for filenames and the file comment.
197         *
198         * @return null if using the platform's default character encoding.
199         */
200        public String getEncoding() {
201            return encoding;
202        }
203    
204        /**
205         * Closes the archive.
206         * @throws IOException if an error occurs closing the archive.
207         */
208        public void close() throws IOException {
209            archive.close();
210        }
211    
212        /**
213         * close a zipfile quietly; throw no io fault, do nothing
214         * on a null parameter
215         * @param zipfile file to close, can be null
216         */
217        public static void closeQuietly(ZipFile zipfile) {
218            if (zipfile != null) {
219                try {
220                    zipfile.close();
221                } catch (IOException e) {
222                    //ignore
223                }
224            }
225        }
226    
227        /**
228         * Returns all entries.
229         * @return all entries as {@link ZipArchiveEntry} instances
230         */
231        public Enumeration getEntries() {
232            return Collections.enumeration(entries.keySet());
233        }
234    
235        /**
236         * Returns a named entry - or <code>null</code> if no entry by
237         * that name exists.
238         * @param name name of the entry.
239         * @return the ZipArchiveEntry corresponding to the given name - or
240         * <code>null</code> if not present.
241         */
242        public ZipArchiveEntry getEntry(String name) {
243            return (ZipArchiveEntry) nameMap.get(name);
244        }
245    
246        /**
247         * Returns an InputStream for reading the contents of the given entry.
248         * @param ze the entry to get the stream for.
249         * @return a stream to read the entry from.
250         * @throws IOException if unable to create an input stream from the zipenty
251         * @throws ZipException if the zipentry has an unsupported
252         * compression method
253         */
254        public InputStream getInputStream(ZipArchiveEntry ze)
255            throws IOException, ZipException {
256            OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
257            if (offsetEntry == null) {
258                return null;
259            }
260            long start = offsetEntry.dataOffset;
261            BoundedInputStream bis =
262                new BoundedInputStream(start, ze.getCompressedSize());
263            switch (ze.getMethod()) {
264                case ZipArchiveEntry.STORED:
265                    return bis;
266                case ZipArchiveEntry.DEFLATED:
267                    bis.addDummy();
268                    return new InflaterInputStream(bis, new Inflater(true));
269                default:
270                    throw new ZipException("Found unsupported compression method "
271                                           + ze.getMethod());
272            }
273        }
274    
275        private static final int CFH_LEN =
276            /* version made by                 */ SHORT
277            /* version needed to extract       */ + SHORT
278            /* general purpose bit flag        */ + SHORT
279            /* compression method              */ + SHORT
280            /* last mod file time              */ + SHORT
281            /* last mod file date              */ + SHORT
282            /* crc-32                          */ + WORD
283            /* compressed size                 */ + WORD
284            /* uncompressed size               */ + WORD
285            /* filename length                 */ + SHORT
286            /* extra field length              */ + SHORT
287            /* file comment length             */ + SHORT
288            /* disk number start               */ + SHORT
289            /* internal file attributes        */ + SHORT
290            /* external file attributes        */ + WORD
291            /* relative offset of local header */ + WORD;
292    
293        /**
294         * Reads the central directory of the given archive and populates
295         * the internal tables with ZipArchiveEntry instances.
296         *
297         * <p>The ZipArchiveEntrys will know all data that can be obtained from
298         * the central directory alone, but not the data that requires the
299         * local file header or additional data to be read.</p>
300         *
301         * @return a Map&lt;ZipArchiveEntry, NameAndComment>&gt; of
302         * zipentries that didn't have the language encoding flag set when
303         * read.
304         */
305        private Map populateFromCentralDirectory()
306            throws IOException {
307            HashMap noEFS = new HashMap();
308    
309            positionAtCentralDirectory();
310    
311            byte[] cfh = new byte[CFH_LEN];
312    
313            byte[] signatureBytes = new byte[WORD];
314            archive.readFully(signatureBytes);
315            long sig = ZipLong.getValue(signatureBytes);
316            final long cfhSig = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
317            if (sig != cfhSig && startsWithLocalFileHeader()) {
318                throw new IOException("central directory is empty, can't expand"
319                                      + " corrupt archive.");
320            }
321            while (sig == cfhSig) {
322                archive.readFully(cfh);
323                int off = 0;
324                ZipArchiveEntry ze = new ZipArchiveEntry();
325    
326                int versionMadeBy = ZipShort.getValue(cfh, off);
327                off += SHORT;
328                ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
329    
330                off += SHORT; // skip version info
331    
332                final int generalPurposeFlag = ZipShort.getValue(cfh, off);
333                final boolean hasEFS = 
334                    (generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0;
335                final ZipEncoding entryEncoding =
336                    hasEFS ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
337    
338                off += SHORT;
339    
340                ze.setMethod(ZipShort.getValue(cfh, off));
341                off += SHORT;
342    
343                // FIXME this is actually not very cpu cycles friendly as we are converting from
344                // dos to java while the underlying Sun implementation will convert
345                // from java to dos time for internal storage...
346                long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfh, off));
347                ze.setTime(time);
348                off += WORD;
349    
350                ze.setCrc(ZipLong.getValue(cfh, off));
351                off += WORD;
352    
353                ze.setCompressedSize(ZipLong.getValue(cfh, off));
354                off += WORD;
355    
356                ze.setSize(ZipLong.getValue(cfh, off));
357                off += WORD;
358    
359                int fileNameLen = ZipShort.getValue(cfh, off);
360                off += SHORT;
361    
362                int extraLen = ZipShort.getValue(cfh, off);
363                off += SHORT;
364    
365                int commentLen = ZipShort.getValue(cfh, off);
366                off += SHORT;
367    
368                off += SHORT; // disk number
369    
370                ze.setInternalAttributes(ZipShort.getValue(cfh, off));
371                off += SHORT;
372    
373                ze.setExternalAttributes(ZipLong.getValue(cfh, off));
374                off += WORD;
375    
376                byte[] fileName = new byte[fileNameLen];
377                archive.readFully(fileName);
378                ze.setName(entryEncoding.decode(fileName));
379    
380                // LFH offset,
381                OffsetEntry offset = new OffsetEntry();
382                offset.headerOffset = ZipLong.getValue(cfh, off);
383                // data offset will be filled later
384                entries.put(ze, offset);
385    
386                nameMap.put(ze.getName(), ze);
387    
388                byte[] cdExtraData = new byte[extraLen];
389                archive.readFully(cdExtraData);
390                ze.setCentralDirectoryExtra(cdExtraData);
391    
392                byte[] comment = new byte[commentLen];
393                archive.readFully(comment);
394                ze.setComment(entryEncoding.decode(comment));
395    
396                archive.readFully(signatureBytes);
397                sig = ZipLong.getValue(signatureBytes);
398    
399                if (!hasEFS && useUnicodeExtraFields) {
400                    noEFS.put(ze, new NameAndComment(fileName, comment));
401                }
402            }
403            return noEFS;
404        }
405    
406        private static final int MIN_EOCD_SIZE =
407            /* end of central dir signature    */ WORD
408            /* number of this disk             */ + SHORT
409            /* number of the disk with the     */
410            /* start of the central directory  */ + SHORT
411            /* total number of entries in      */
412            /* the central dir on this disk    */ + SHORT
413            /* total number of entries in      */
414            /* the central dir                 */ + SHORT
415            /* size of the central directory   */ + WORD
416            /* offset of start of central      */
417            /* directory with respect to       */
418            /* the starting disk number        */ + WORD
419            /* zipfile comment length          */ + SHORT;
420    
421        private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
422            /* maximum length of zipfile comment */ + 0xFFFF;
423    
424        private static final int CFD_LOCATOR_OFFSET =
425            /* end of central dir signature    */ WORD
426            /* number of this disk             */ + SHORT
427            /* number of the disk with the     */
428            /* start of the central directory  */ + SHORT
429            /* total number of entries in      */
430            /* the central dir on this disk    */ + SHORT
431            /* total number of entries in      */
432            /* the central dir                 */ + SHORT
433            /* size of the central directory   */ + WORD;
434    
435        /**
436         * Searches for the &quot;End of central dir record&quot;, parses
437         * it and positions the stream at the first central directory
438         * record.
439         */
440        private void positionAtCentralDirectory()
441            throws IOException {
442            boolean found = false;
443            long off = archive.length() - MIN_EOCD_SIZE;
444            long stopSearching = Math.max(0L, archive.length() - MAX_EOCD_SIZE);
445            if (off >= 0) {
446                archive.seek(off);
447                byte[] sig = ZipArchiveOutputStream.EOCD_SIG;
448                int curr = archive.read();
449                while (off >= stopSearching && curr != -1) {
450                    if (curr == sig[POS_0]) {
451                        curr = archive.read();
452                        if (curr == sig[POS_1]) {
453                            curr = archive.read();
454                            if (curr == sig[POS_2]) {
455                                curr = archive.read();
456                                if (curr == sig[POS_3]) {
457                                    found = true;
458                                    break;
459                                }
460                            }
461                        }
462                    }
463                    archive.seek(--off);
464                    curr = archive.read();
465                }
466            }
467            if (!found) {
468                throw new ZipException("archive is not a ZIP archive");
469            }
470            archive.seek(off + CFD_LOCATOR_OFFSET);
471            byte[] cfdOffset = new byte[WORD];
472            archive.readFully(cfdOffset);
473            archive.seek(ZipLong.getValue(cfdOffset));
474        }
475    
476        /**
477         * Number of bytes in local file header up to the &quot;length of
478         * filename&quot; entry.
479         */
480        private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
481            /* local file header signature     */ WORD
482            /* version needed to extract       */ + SHORT
483            /* general purpose bit flag        */ + SHORT
484            /* compression method              */ + SHORT
485            /* last mod file time              */ + SHORT
486            /* last mod file date              */ + SHORT
487            /* crc-32                          */ + WORD
488            /* compressed size                 */ + WORD
489            /* uncompressed size               */ + WORD;
490    
491        /**
492         * Walks through all recorded entries and adds the data available
493         * from the local file header.
494         *
495         * <p>Also records the offsets for the data to read from the
496         * entries.</p>
497         */
498        private void resolveLocalFileHeaderData(Map entriesWithoutEFS)
499            throws IOException {
500            Enumeration e = getEntries();
501            while (e.hasMoreElements()) {
502                ZipArchiveEntry ze = (ZipArchiveEntry) e.nextElement();
503                OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
504                long offset = offsetEntry.headerOffset;
505                archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
506                byte[] b = new byte[SHORT];
507                archive.readFully(b);
508                int fileNameLen = ZipShort.getValue(b);
509                archive.readFully(b);
510                int extraFieldLen = ZipShort.getValue(b);
511                int lenToSkip = fileNameLen;
512                while (lenToSkip > 0) {
513                    int skipped = archive.skipBytes(lenToSkip);
514                    if (skipped <= 0) {
515                        throw new RuntimeException("failed to skip file name in"
516                                                   + " local file header");
517                    }
518                    lenToSkip -= skipped;
519                }
520                byte[] localExtraData = new byte[extraFieldLen];
521                archive.readFully(localExtraData);
522                ze.setExtra(localExtraData);
523                /*dataOffsets.put(ze,
524                                new Long(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
525                                         + SHORT + SHORT + fileNameLen + extraFieldLen));
526                */
527                offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
528                    + SHORT + SHORT + fileNameLen + extraFieldLen;
529    
530                if (entriesWithoutEFS.containsKey(ze)) {
531                    String orig = ze.getName();
532                    NameAndComment nc = (NameAndComment) entriesWithoutEFS.get(ze);
533                    ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
534                                                             nc.comment);
535                    if (!orig.equals(ze.getName())) {
536                        nameMap.remove(orig);
537                        nameMap.put(ze.getName(), ze);
538                    }
539                }
540            }
541        }
542    
543        /**
544         * Checks whether the archive starts with a LFH.  If it doesn't,
545         * it may be an empty archive.
546         */
547        private boolean startsWithLocalFileHeader() throws IOException {
548            archive.seek(0);
549            final byte[] start = new byte[WORD];
550            archive.readFully(start);
551            for (int i = 0; i < start.length; i++) {
552                if (start[i] != ZipArchiveOutputStream.LFH_SIG[i]) {
553                    return false;
554                }
555            }
556            return true;
557        }
558    
559        /**
560         * InputStream that delegates requests to the underlying
561         * RandomAccessFile, making sure that only bytes from a certain
562         * range can be read.
563         */
564        private class BoundedInputStream extends InputStream {
565            private long remaining;
566            private long loc;
567            private boolean addDummyByte = false;
568    
569            BoundedInputStream(long start, long remaining) {
570                this.remaining = remaining;
571                loc = start;
572            }
573    
574            public int read() throws IOException {
575                if (remaining-- <= 0) {
576                    if (addDummyByte) {
577                        addDummyByte = false;
578                        return 0;
579                    }
580                    return -1;
581                }
582                synchronized (archive) {
583                    archive.seek(loc++);
584                    return archive.read();
585                }
586            }
587    
588            public int read(byte[] b, int off, int len) throws IOException {
589                if (remaining <= 0) {
590                    if (addDummyByte) {
591                        addDummyByte = false;
592                        b[off] = 0;
593                        return 1;
594                    }
595                    return -1;
596                }
597    
598                if (len <= 0) {
599                    return 0;
600                }
601    
602                if (len > remaining) {
603                    len = (int) remaining;
604                }
605                int ret = -1;
606                synchronized (archive) {
607                    archive.seek(loc);
608                    ret = archive.read(b, off, len);
609                }
610                if (ret > 0) {
611                    loc += ret;
612                    remaining -= ret;
613                }
614                return ret;
615            }
616    
617            /**
618             * Inflater needs an extra dummy byte for nowrap - see
619             * Inflater's javadocs.
620             */
621            void addDummy() {
622                addDummyByte = true;
623            }
624        }
625    
626        private static final class NameAndComment {
627            private final byte[] name;
628            private final byte[] comment;
629            private NameAndComment(byte[] name, byte[] comment) {
630                this.name = name;
631                this.comment = comment;
632            }
633        }
634    }