001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018package org.apache.commons.compress.archivers.zip;
019
020import java.io.BufferedInputStream;
021import java.io.ByteArrayInputStream;
022import java.io.Closeable;
023import java.io.EOFException;
024import java.io.File;
025import java.io.IOException;
026import java.io.InputStream;
027import java.io.SequenceInputStream;
028import java.nio.ByteBuffer;
029import java.nio.channels.FileChannel;
030import java.nio.channels.SeekableByteChannel;
031import java.nio.file.Files;
032import java.nio.file.StandardOpenOption;
033import java.util.Arrays;
034import java.util.Collections;
035import java.util.Comparator;
036import java.util.Enumeration;
037import java.util.EnumSet;
038import java.util.HashMap;
039import java.util.LinkedList;
040import java.util.List;
041import java.util.Map;
042import java.util.zip.Inflater;
043import java.util.zip.InflaterInputStream;
044import java.util.zip.ZipException;
045
046import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
047import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
048import org.apache.commons.compress.utils.IOUtils;
049
050import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
051import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
052import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
053import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
054import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
055
056/**
057 * Replacement for <code>java.util.ZipFile</code>.
058 *
059 * <p>This class adds support for file name encodings other than UTF-8
060 * (which is required to work on ZIP files created by native zip tools
061 * and is able to skip a preamble like the one found in self
062 * extracting archives.  Furthermore it returns instances of
063 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
064 * instead of <code>java.util.zip.ZipEntry</code>.</p>
065 *
066 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
067 * have to reimplement all methods anyway.  Like
068 * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the
069 * covers and supports compressed and uncompressed entries.  As of
070 * Apache Commons Compress 1.3 it also transparently supports Zip64
071 * extensions and thus individual entries and archives larger than 4
072 * GB or with more than 65536 entries.</p>
073 *
074 * <p>The method signatures mimic the ones of
075 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
076 *
077 * <ul>
078 *   <li>There is no getName method.</li>
079 *   <li>entries has been renamed to getEntries.</li>
080 *   <li>getEntries and getEntry return
081 *   <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
082 *   instances.</li>
083 *   <li>close is allowed to throw IOException.</li>
084 * </ul>
085 *
086 */
087public class ZipFile implements Closeable {
088    private static final int HASH_SIZE = 509;
089    static final int NIBLET_MASK = 0x0f;
090    static final int BYTE_SHIFT = 8;
091    private static final int POS_0 = 0;
092    private static final int POS_1 = 1;
093    private static final int POS_2 = 2;
094    private static final int POS_3 = 3;
095    private static final byte[] ONE_ZERO_BYTE = new byte[1];
096
097    /**
098     * List of entries in the order they appear inside the central
099     * directory.
100     */
101    private final List<ZipArchiveEntry> entries =
102        new LinkedList<>();
103
104    /**
105     * Maps String to list of ZipArchiveEntrys, name -> actual entries.
106     */
107    private final Map<String, LinkedList<ZipArchiveEntry>> nameMap =
108        new HashMap<>(HASH_SIZE);
109
110    /**
111     * The encoding to use for filenames and the file comment.
112     *
113     * <p>For a list of possible values see <a
114     * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
115     * Defaults to UTF-8.</p>
116     */
117    private final String encoding;
118
119    /**
120     * The zip encoding to use for filenames and the file comment.
121     */
122    private final ZipEncoding zipEncoding;
123
124    /**
125     * File name of actual source.
126     */
127    private final String archiveName;
128
129    /**
130     * The actual data source.
131     */
132    private final SeekableByteChannel archive;
133
134    /**
135     * Whether to look for and use Unicode extra fields.
136     */
137    private final boolean useUnicodeExtraFields;
138
139    /**
140     * Whether the file is closed.
141     */
142    private volatile boolean closed = true;
143
144    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
145    private final byte[] dwordBuf = new byte[DWORD];
146    private final byte[] wordBuf = new byte[WORD];
147    private final byte[] cfhBuf = new byte[CFH_LEN];
148    private final byte[] shortBuf = new byte[SHORT];
149    private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
150    private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
151    private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
152
153    /**
154     * Opens the given file for reading, assuming "UTF8" for file names.
155     *
156     * @param f the archive.
157     *
158     * @throws IOException if an error occurs while reading the file.
159     */
160    public ZipFile(final File f) throws IOException {
161        this(f, ZipEncodingHelper.UTF8);
162    }
163
164    /**
165     * Opens the given file for reading, assuming "UTF8".
166     *
167     * @param name name of the archive.
168     *
169     * @throws IOException if an error occurs while reading the file.
170     */
171    public ZipFile(final String name) throws IOException {
172        this(new File(name), ZipEncodingHelper.UTF8);
173    }
174
175    /**
176     * Opens the given file for reading, assuming the specified
177     * encoding for file names, scanning unicode extra fields.
178     *
179     * @param name name of the archive.
180     * @param encoding the encoding to use for file names, use null
181     * for the platform's default encoding
182     *
183     * @throws IOException if an error occurs while reading the file.
184     */
185    public ZipFile(final String name, final String encoding) throws IOException {
186        this(new File(name), encoding, true);
187    }
188
189    /**
190     * Opens the given file for reading, assuming the specified
191     * encoding for file names and scanning for unicode extra fields.
192     *
193     * @param f the archive.
194     * @param encoding the encoding to use for file names, use null
195     * for the platform's default encoding
196     *
197     * @throws IOException if an error occurs while reading the file.
198     */
199    public ZipFile(final File f, final String encoding) throws IOException {
200        this(f, encoding, true);
201    }
202
203    /**
204     * Opens the given file for reading, assuming the specified
205     * encoding for file names.
206     *
207     * @param f the archive.
208     * @param encoding the encoding to use for file names, use null
209     * for the platform's default encoding
210     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
211     * Extra Fields (if present) to set the file names.
212     *
213     * @throws IOException if an error occurs while reading the file.
214     */
215    public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields)
216        throws IOException {
217        this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)),
218             f.getAbsolutePath(), encoding, useUnicodeExtraFields, true);
219    }
220
221    /**
222     * Opens the given channel for reading, assuming "UTF8" for file names.
223     *
224     * <p>{@link
225     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
226     * allows you to read from an in-memory archive.</p>
227     *
228     * @param channel the archive.
229     *
230     * @throws IOException if an error occurs while reading the file.
231     * @since 1.13
232     */
233    public ZipFile(final SeekableByteChannel channel)
234            throws IOException {
235        this(channel, "unknown archive", ZipEncodingHelper.UTF8, true);
236    }
237
238    /**
239     * Opens the given channel for reading, assuming the specified
240     * encoding for file names.
241     *
242     * <p>{@link
243     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
244     * allows you to read from an in-memory archive.</p>
245     *
246     * @param channel the archive.
247     * @param encoding the encoding to use for file names, use null
248     * for the platform's default encoding
249     *
250     * @throws IOException if an error occurs while reading the file.
251     * @since 1.13
252     */
253    public ZipFile(final SeekableByteChannel channel, final String encoding)
254        throws IOException {
255        this(channel, "unknown archive", encoding, true);
256    }
257
258    /**
259     * Opens the given channel for reading, assuming the specified
260     * encoding for file names.
261     *
262     * <p>{@link
263     * org.apache.commons.compress.utils.SeekableInMemoryByteChannel}
264     * allows you to read from an in-memory archive.</p>
265     *
266     * @param channel the archive.
267     * @param archiveName name of the archive, used for error messages only.
268     * @param encoding the encoding to use for file names, use null
269     * for the platform's default encoding
270     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
271     * Extra Fields (if present) to set the file names.
272     *
273     * @throws IOException if an error occurs while reading the file.
274     * @since 1.13
275     */
276    public ZipFile(final SeekableByteChannel channel, final String archiveName,
277                   final String encoding, final boolean useUnicodeExtraFields)
278        throws IOException {
279        this(channel, archiveName, encoding, useUnicodeExtraFields, false);
280    }
281
282    private ZipFile(final SeekableByteChannel channel, final String archiveName,
283                    final String encoding, final boolean useUnicodeExtraFields,
284                    final boolean closeOnError)
285        throws IOException {
286        this.archiveName = archiveName;
287        this.encoding = encoding;
288        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
289        this.useUnicodeExtraFields = useUnicodeExtraFields;
290        archive = channel;
291        boolean success = false;
292        try {
293            final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
294                populateFromCentralDirectory();
295            resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
296            success = true;
297        } finally {
298            closed = !success;
299            if (!success && closeOnError) {
300                IOUtils.closeQuietly(archive);
301            }
302        }
303    }
304
305    /**
306     * The encoding to use for filenames and the file comment.
307     *
308     * @return null if using the platform's default character encoding.
309     */
310    public String getEncoding() {
311        return encoding;
312    }
313
314    /**
315     * Closes the archive.
316     * @throws IOException if an error occurs closing the archive.
317     */
318    @Override
319    public void close() throws IOException {
320        // this flag is only written here and read in finalize() which
321        // can never be run in parallel.
322        // no synchronization needed.
323        closed = true;
324
325        archive.close();
326    }
327
328    /**
329     * close a zipfile quietly; throw no io fault, do nothing
330     * on a null parameter
331     * @param zipfile file to close, can be null
332     */
333    public static void closeQuietly(final ZipFile zipfile) {
334        IOUtils.closeQuietly(zipfile);
335    }
336
337    /**
338     * Returns all entries.
339     *
340     * <p>Entries will be returned in the same order they appear
341     * within the archive's central directory.</p>
342     *
343     * @return all entries as {@link ZipArchiveEntry} instances
344     */
345    public Enumeration<ZipArchiveEntry> getEntries() {
346        return Collections.enumeration(entries);
347    }
348
349    /**
350     * Returns all entries in physical order.
351     *
352     * <p>Entries will be returned in the same order their contents
353     * appear within the archive.</p>
354     *
355     * @return all entries as {@link ZipArchiveEntry} instances
356     *
357     * @since 1.1
358     */
359    public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() {
360        final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]);
361        Arrays.sort(allEntries, offsetComparator);
362        return Collections.enumeration(Arrays.asList(allEntries));
363    }
364
365    /**
366     * Returns a named entry - or {@code null} if no entry by
367     * that name exists.
368     *
369     * <p>If multiple entries with the same name exist the first entry
370     * in the archive's central directory by that name is
371     * returned.</p>
372     *
373     * @param name name of the entry.
374     * @return the ZipArchiveEntry corresponding to the given name - or
375     * {@code null} if not present.
376     */
377    public ZipArchiveEntry getEntry(final String name) {
378        final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
379        return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
380    }
381
382    /**
383     * Returns all named entries in the same order they appear within
384     * the archive's central directory.
385     *
386     * @param name name of the entry.
387     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
388     * given name
389     * @since 1.6
390     */
391    public Iterable<ZipArchiveEntry> getEntries(final String name) {
392        final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
393        return entriesOfThatName != null ? entriesOfThatName
394            : Collections.<ZipArchiveEntry>emptyList();
395    }
396
397    /**
398     * Returns all named entries in the same order their contents
399     * appear within the archive.
400     *
401     * @param name name of the entry.
402     * @return the Iterable&lt;ZipArchiveEntry&gt; corresponding to the
403     * given name
404     * @since 1.6
405     */
406    public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) {
407        ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0];
408        if (nameMap.containsKey(name)) {
409            entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName);
410            Arrays.sort(entriesOfThatName, offsetComparator);
411        }
412        return Arrays.asList(entriesOfThatName);
413    }
414
415    /**
416     * Whether this class is able to read the given entry.
417     *
418     * <p>May return false if it is set up to use encryption or a
419     * compression method that hasn't been implemented yet.</p>
420     * @since 1.1
421     * @param ze the entry
422     * @return whether this class is able to read the given entry.
423     */
424    public boolean canReadEntryData(final ZipArchiveEntry ze) {
425        return ZipUtil.canHandleEntryData(ze);
426    }
427
428    /**
429     * Expose the raw stream of the archive entry (compressed form).
430     *
431     * <p>This method does not relate to how/if we understand the payload in the
432     * stream, since we really only intend to move it on to somewhere else.</p>
433     *
434     * @param ze The entry to get the stream for
435     * @return The raw input stream containing (possibly) compressed data.
436     * @since 1.11
437     */
438    public InputStream getRawInputStream(final ZipArchiveEntry ze) {
439        if (!(ze instanceof Entry)) {
440            return null;
441        }
442        final long start = ze.getDataOffset();
443        return createBoundedInputStream(start, ze.getCompressedSize());
444    }
445
446
447    /**
448     * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream.
449     * Compression and all other attributes will be as in this file.
450     * <p>This method transfers entries based on the central directory of the zip file.</p>
451     *
452     * @param target The zipArchiveOutputStream to write the entries to
453     * @param predicate A predicate that selects which entries to write
454     * @throws IOException on error
455     */
456    public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate)
457            throws IOException {
458        final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder();
459        while (src.hasMoreElements()) {
460            final ZipArchiveEntry entry = src.nextElement();
461            if (predicate.test( entry)) {
462                target.addRawArchiveEntry(entry, getRawInputStream(entry));
463            }
464        }
465    }
466
467    /**
468     * Returns an InputStream for reading the contents of the given entry.
469     *
470     * @param ze the entry to get the stream for.
471     * @return a stream to read the entry from.
472     * @throws IOException if unable to create an input stream from the zipentry
473     */
474    public InputStream getInputStream(final ZipArchiveEntry ze)
475        throws IOException {
476        if (!(ze instanceof Entry)) {
477            return null;
478        }
479        // cast validity is checked just above
480        ZipUtil.checkRequestedFeatures(ze);
481        final long start = ze.getDataOffset();
482
483        // doesn't get closed if the method is not supported - which
484        // should never happen because of the checkRequestedFeatures
485        // call above
486        final InputStream is =
487            new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR
488        switch (ZipMethod.getMethodByCode(ze.getMethod())) {
489            case STORED:
490                return is;
491            case UNSHRINKING:
492                return new UnshrinkingInputStream(is);
493            case IMPLODING:
494                return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(),
495                        ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is);
496            case DEFLATED:
497                final Inflater inflater = new Inflater(true);
498                // Inflater with nowrap=true has this odd contract for a zero padding
499                // byte following the data stream; this used to be zlib's requirement
500                // and has been fixed a long time ago, but the contract persists so
501                // we comply.
502                // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean)
503                return new InflaterInputStream(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)),
504                    inflater) {
505                    @Override
506                    public void close() throws IOException {
507                        try {
508                            super.close();
509                        } finally {
510                            inflater.end();
511                        }
512                    }
513                };
514            case BZIP2:
515                return new BZip2CompressorInputStream(is);
516            case ENHANCED_DEFLATED:
517                return new Deflate64CompressorInputStream(is);
518            case AES_ENCRYPTED:
519            case EXPANDING_LEVEL_1:
520            case EXPANDING_LEVEL_2:
521            case EXPANDING_LEVEL_3:
522            case EXPANDING_LEVEL_4:
523            case JPEG:
524            case LZMA:
525            case PKWARE_IMPLODING:
526            case PPMD:
527            case TOKENIZATION:
528            case UNKNOWN:
529            case WAVPACK:
530            case XZ:
531            default:
532                throw new ZipException("Found unsupported compression method "
533                                       + ze.getMethod());
534        }
535    }
536
537    /**
538     * <p>
539     * Convenience method to return the entry's content as a String if isUnixSymlink()
540     * returns true for it, otherwise returns null.
541     * </p>
542     *
543     * <p>This method assumes the symbolic link's file name uses the
544     * same encoding that as been specified for this ZipFile.</p>
545     *
546     * @param entry ZipArchiveEntry object that represents the symbolic link
547     * @return entry's content as a String
548     * @throws IOException problem with content's input stream
549     * @since 1.5
550     */
551    public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException {
552        if (entry != null && entry.isUnixSymlink()) {
553            try (InputStream in = getInputStream(entry)) {
554                return zipEncoding.decode(IOUtils.toByteArray(in));
555            }
556        }
557        return null;
558    }
559
560    /**
561     * Ensures that the close method of this zipfile is called when
562     * there are no more references to it.
563     * @see #close()
564     */
565    @Override
566    protected void finalize() throws Throwable {
567        try {
568            if (!closed) {
569                System.err.println("Cleaning up unclosed ZipFile for archive "
570                                   + archiveName);
571                close();
572            }
573        } finally {
574            super.finalize();
575        }
576    }
577
578    /**
579     * Length of a "central directory" entry structure without file
580     * name, extra fields or comment.
581     */
582    private static final int CFH_LEN =
583        /* version made by                 */ SHORT
584        /* version needed to extract       */ + SHORT
585        /* general purpose bit flag        */ + SHORT
586        /* compression method              */ + SHORT
587        /* last mod file time              */ + SHORT
588        /* last mod file date              */ + SHORT
589        /* crc-32                          */ + WORD
590        /* compressed size                 */ + WORD
591        /* uncompressed size               */ + WORD
592        /* filename length                 */ + SHORT
593        /* extra field length              */ + SHORT
594        /* file comment length             */ + SHORT
595        /* disk number start               */ + SHORT
596        /* internal file attributes        */ + SHORT
597        /* external file attributes        */ + WORD
598        /* relative offset of local header */ + WORD;
599
600    private static final long CFH_SIG =
601        ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
602
603    /**
604     * Reads the central directory of the given archive and populates
605     * the internal tables with ZipArchiveEntry instances.
606     *
607     * <p>The ZipArchiveEntrys will know all data that can be obtained from
608     * the central directory alone, but not the data that requires the
609     * local file header or additional data to be read.</p>
610     *
611     * @return a map of zipentries that didn't have the language
612     * encoding flag set when read.
613     */
614    private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory()
615        throws IOException {
616        final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag =
617            new HashMap<>();
618
619        positionAtCentralDirectory();
620
621        wordBbuf.rewind();
622        IOUtils.readFully(archive, wordBbuf);
623        long sig = ZipLong.getValue(wordBuf);
624
625        if (sig != CFH_SIG && startsWithLocalFileHeader()) {
626            throw new IOException("central directory is empty, can't expand"
627                                  + " corrupt archive.");
628        }
629
630        while (sig == CFH_SIG) {
631            readCentralDirectoryEntry(noUTF8Flag);
632            wordBbuf.rewind();
633            IOUtils.readFully(archive, wordBbuf);
634            sig = ZipLong.getValue(wordBuf);
635        }
636        return noUTF8Flag;
637    }
638
639    /**
640     * Reads an individual entry of the central directory, creats an
641     * ZipArchiveEntry from it and adds it to the global maps.
642     *
643     * @param noUTF8Flag map used to collect entries that don't have
644     * their UTF-8 flag set and whose name will be set by data read
645     * from the local file header later.  The current entry may be
646     * added to this map.
647     */
648    private void
649        readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag)
650        throws IOException {
651        cfhBbuf.rewind();
652        IOUtils.readFully(archive, cfhBbuf);
653        int off = 0;
654        final Entry ze = new Entry();
655
656        final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
657        off += SHORT;
658        ze.setVersionMadeBy(versionMadeBy);
659        ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
660
661        ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
662        off += SHORT; // version required
663
664        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
665        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
666        final ZipEncoding entryEncoding =
667            hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
668        if (hasUTF8Flag) {
669            ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
670        }
671        ze.setGeneralPurposeBit(gpFlag);
672        ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
673
674        off += SHORT;
675
676        //noinspection MagicConstant
677        ze.setMethod(ZipShort.getValue(cfhBuf, off));
678        off += SHORT;
679
680        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
681        ze.setTime(time);
682        off += WORD;
683
684        ze.setCrc(ZipLong.getValue(cfhBuf, off));
685        off += WORD;
686
687        ze.setCompressedSize(ZipLong.getValue(cfhBuf, off));
688        off += WORD;
689
690        ze.setSize(ZipLong.getValue(cfhBuf, off));
691        off += WORD;
692
693        final int fileNameLen = ZipShort.getValue(cfhBuf, off);
694        off += SHORT;
695
696        final int extraLen = ZipShort.getValue(cfhBuf, off);
697        off += SHORT;
698
699        final int commentLen = ZipShort.getValue(cfhBuf, off);
700        off += SHORT;
701
702        final int diskStart = ZipShort.getValue(cfhBuf, off);
703        off += SHORT;
704
705        ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
706        off += SHORT;
707
708        ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
709        off += WORD;
710
711        final byte[] fileName = new byte[fileNameLen];
712        IOUtils.readFully(archive, ByteBuffer.wrap(fileName));
713        ze.setName(entryEncoding.decode(fileName), fileName);
714
715        // LFH offset,
716        ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off));
717        // data offset will be filled later
718        entries.add(ze);
719
720        final byte[] cdExtraData = new byte[extraLen];
721        IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData));
722        ze.setCentralDirectoryExtra(cdExtraData);
723
724        setSizesAndOffsetFromZip64Extra(ze, diskStart);
725
726        final byte[] comment = new byte[commentLen];
727        IOUtils.readFully(archive, ByteBuffer.wrap(comment));
728        ze.setComment(entryEncoding.decode(comment));
729
730        if (!hasUTF8Flag && useUnicodeExtraFields) {
731            noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
732        }
733    }
734
735    /**
736     * If the entry holds a Zip64 extended information extra field,
737     * read sizes from there if the entry's sizes are set to
738     * 0xFFFFFFFFF, do the same for the offset of the local file
739     * header.
740     *
741     * <p>Ensures the Zip64 extra either knows both compressed and
742     * uncompressed size or neither of both as the internal logic in
743     * ExtraFieldUtils forces the field to create local header data
744     * even if they are never used - and here a field with only one
745     * size would be invalid.</p>
746     */
747    private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze,
748                                                 final int diskStart)
749        throws IOException {
750        final Zip64ExtendedInformationExtraField z64 =
751            (Zip64ExtendedInformationExtraField)
752            ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
753        if (z64 != null) {
754            final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
755            final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
756            final boolean hasRelativeHeaderOffset =
757                ze.getLocalHeaderOffset() == ZIP64_MAGIC;
758            z64.reparseCentralDirectoryData(hasUncompressedSize,
759                                            hasCompressedSize,
760                                            hasRelativeHeaderOffset,
761                                            diskStart == ZIP64_MAGIC_SHORT);
762
763            if (hasUncompressedSize) {
764                ze.setSize(z64.getSize().getLongValue());
765            } else if (hasCompressedSize) {
766                z64.setSize(new ZipEightByteInteger(ze.getSize()));
767            }
768
769            if (hasCompressedSize) {
770                ze.setCompressedSize(z64.getCompressedSize().getLongValue());
771            } else if (hasUncompressedSize) {
772                z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
773            }
774
775            if (hasRelativeHeaderOffset) {
776                ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
777            }
778        }
779    }
780
781    /**
782     * Length of the "End of central directory record" - which is
783     * supposed to be the last structure of the archive - without file
784     * comment.
785     */
786    static final int MIN_EOCD_SIZE =
787        /* end of central dir signature    */ WORD
788        /* number of this disk             */ + SHORT
789        /* number of the disk with the     */
790        /* start of the central directory  */ + SHORT
791        /* total number of entries in      */
792        /* the central dir on this disk    */ + SHORT
793        /* total number of entries in      */
794        /* the central dir                 */ + SHORT
795        /* size of the central directory   */ + WORD
796        /* offset of start of central      */
797        /* directory with respect to       */
798        /* the starting disk number        */ + WORD
799        /* zipfile comment length          */ + SHORT;
800
801    /**
802     * Maximum length of the "End of central directory record" with a
803     * file comment.
804     */
805    private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
806        /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
807
808    /**
809     * Offset of the field that holds the location of the first
810     * central directory entry inside the "End of central directory
811     * record" relative to the start of the "End of central directory
812     * record".
813     */
814    private static final int CFD_LOCATOR_OFFSET =
815        /* end of central dir signature    */ WORD
816        /* number of this disk             */ + SHORT
817        /* number of the disk with the     */
818        /* start of the central directory  */ + SHORT
819        /* total number of entries in      */
820        /* the central dir on this disk    */ + SHORT
821        /* total number of entries in      */
822        /* the central dir                 */ + SHORT
823        /* size of the central directory   */ + WORD;
824
825    /**
826     * Length of the "Zip64 end of central directory locator" - which
827     * should be right in front of the "end of central directory
828     * record" if one is present at all.
829     */
830    private static final int ZIP64_EOCDL_LENGTH =
831        /* zip64 end of central dir locator sig */ WORD
832        /* number of the disk with the start    */
833        /* start of the zip64 end of            */
834        /* central directory                    */ + WORD
835        /* relative offset of the zip64         */
836        /* end of central directory record      */ + DWORD
837        /* total number of disks                */ + WORD;
838
839    /**
840     * Offset of the field that holds the location of the "Zip64 end
841     * of central directory record" inside the "Zip64 end of central
842     * directory locator" relative to the start of the "Zip64 end of
843     * central directory locator".
844     */
845    private static final int ZIP64_EOCDL_LOCATOR_OFFSET =
846        /* zip64 end of central dir locator sig */ WORD
847        /* number of the disk with the start    */
848        /* start of the zip64 end of            */
849        /* central directory                    */ + WORD;
850
851    /**
852     * Offset of the field that holds the location of the first
853     * central directory entry inside the "Zip64 end of central
854     * directory record" relative to the start of the "Zip64 end of
855     * central directory record".
856     */
857    private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
858        /* zip64 end of central dir        */
859        /* signature                       */ WORD
860        /* size of zip64 end of central    */
861        /* directory record                */ + DWORD
862        /* version made by                 */ + SHORT
863        /* version needed to extract       */ + SHORT
864        /* number of this disk             */ + WORD
865        /* number of the disk with the     */
866        /* start of the central directory  */ + WORD
867        /* total number of entries in the  */
868        /* central directory on this disk  */ + DWORD
869        /* total number of entries in the  */
870        /* central directory               */ + DWORD
871        /* size of the central directory   */ + DWORD;
872
873    /**
874     * Searches for either the &quot;Zip64 end of central directory
875     * locator&quot; or the &quot;End of central dir record&quot;, parses
876     * it and positions the stream at the first central directory
877     * record.
878     */
879    private void positionAtCentralDirectory()
880        throws IOException {
881        positionAtEndOfCentralDirectoryRecord();
882        boolean found = false;
883        final boolean searchedForZip64EOCD =
884            archive.position() > ZIP64_EOCDL_LENGTH;
885        if (searchedForZip64EOCD) {
886            archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
887            wordBbuf.rewind();
888            IOUtils.readFully(archive, wordBbuf);
889            found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
890                                  wordBuf);
891        }
892        if (!found) {
893            // not a ZIP64 archive
894            if (searchedForZip64EOCD) {
895                skipBytes(ZIP64_EOCDL_LENGTH - WORD);
896            }
897            positionAtCentralDirectory32();
898        } else {
899            positionAtCentralDirectory64();
900        }
901    }
902
903    /**
904     * Parses the &quot;Zip64 end of central directory locator&quot;,
905     * finds the &quot;Zip64 end of central directory record&quot; using the
906     * parsed information, parses that and positions the stream at the
907     * first central directory record.
908     *
909     * Expects stream to be positioned right behind the &quot;Zip64
910     * end of central directory locator&quot;'s signature.
911     */
912    private void positionAtCentralDirectory64()
913        throws IOException {
914        skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
915                  - WORD /* signature has already been read */);
916        dwordBbuf.rewind();
917        IOUtils.readFully(archive, dwordBbuf);
918        archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
919        wordBbuf.rewind();
920        IOUtils.readFully(archive, wordBbuf);
921        if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
922            throw new ZipException("archive's ZIP64 end of central "
923                                   + "directory locator is corrupt.");
924        }
925        skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
926                  - WORD /* signature has already been read */);
927        dwordBbuf.rewind();
928        IOUtils.readFully(archive, dwordBbuf);
929        archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
930    }
931
932    /**
933     * Parses the &quot;End of central dir record&quot; and positions
934     * the stream at the first central directory record.
935     *
936     * Expects stream to be positioned at the beginning of the
937     * &quot;End of central dir record&quot;.
938     */
939    private void positionAtCentralDirectory32()
940        throws IOException {
941        skipBytes(CFD_LOCATOR_OFFSET);
942        wordBbuf.rewind();
943        IOUtils.readFully(archive, wordBbuf);
944        archive.position(ZipLong.getValue(wordBuf));
945    }
946
947    /**
948     * Searches for the and positions the stream at the start of the
949     * &quot;End of central dir record&quot;.
950     */
951    private void positionAtEndOfCentralDirectoryRecord()
952        throws IOException {
953        final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE,
954                                             ZipArchiveOutputStream.EOCD_SIG);
955        if (!found) {
956            throw new ZipException("archive is not a ZIP archive");
957        }
958    }
959
960    /**
961     * Searches the archive backwards from minDistance to maxDistance
962     * for the given signature, positions the RandomaccessFile right
963     * at the signature if it has been found.
964     */
965    private boolean tryToLocateSignature(final long minDistanceFromEnd,
966                                         final long maxDistanceFromEnd,
967                                         final byte[] sig) throws IOException {
968        boolean found = false;
969        long off = archive.size() - minDistanceFromEnd;
970        final long stopSearching =
971            Math.max(0L, archive.size() - maxDistanceFromEnd);
972        if (off >= 0) {
973            for (; off >= stopSearching; off--) {
974                archive.position(off);
975                try {
976                    wordBbuf.rewind();
977                    IOUtils.readFully(archive, wordBbuf);
978                    wordBbuf.flip();
979                } catch (EOFException ex) {
980                    break;
981                }
982                int curr = wordBbuf.get();
983                if (curr == sig[POS_0]) {
984                    curr = wordBbuf.get();
985                    if (curr == sig[POS_1]) {
986                        curr = wordBbuf.get();
987                        if (curr == sig[POS_2]) {
988                            curr = wordBbuf.get();
989                            if (curr == sig[POS_3]) {
990                                found = true;
991                                break;
992                            }
993                        }
994                    }
995                }
996            }
997        }
998        if (found) {
999            archive.position(off);
1000        }
1001        return found;
1002    }
1003
1004    /**
1005     * Skips the given number of bytes or throws an EOFException if
1006     * skipping failed.
1007     */
1008    private void skipBytes(final int count) throws IOException {
1009        long currentPosition = archive.position();
1010        long newPosition = currentPosition + count;
1011        if (newPosition > archive.size()) {
1012            throw new EOFException();
1013        }
1014        archive.position(newPosition);
1015    }
1016
1017    /**
1018     * Number of bytes in local file header up to the &quot;length of
1019     * filename&quot; entry.
1020     */
1021    private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
1022        /* local file header signature     */ WORD
1023        /* version needed to extract       */ + SHORT
1024        /* general purpose bit flag        */ + SHORT
1025        /* compression method              */ + SHORT
1026        /* last mod file time              */ + SHORT
1027        /* last mod file date              */ + SHORT
1028        /* crc-32                          */ + WORD
1029        /* compressed size                 */ + WORD
1030        /* uncompressed size               */ + (long) WORD;
1031
1032    /**
1033     * Walks through all recorded entries and adds the data available
1034     * from the local file header.
1035     *
1036     * <p>Also records the offsets for the data to read from the
1037     * entries.</p>
1038     */
1039    private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment>
1040                                            entriesWithoutUTF8Flag)
1041        throws IOException {
1042        for (final ZipArchiveEntry zipArchiveEntry : entries) {
1043            // entries is filled in populateFromCentralDirectory and
1044            // never modified
1045            final Entry ze = (Entry) zipArchiveEntry;
1046            final long offset = ze.getLocalHeaderOffset();
1047            archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
1048            wordBbuf.rewind();
1049            IOUtils.readFully(archive, wordBbuf);
1050            wordBbuf.flip();
1051            wordBbuf.get(shortBuf);
1052            final int fileNameLen = ZipShort.getValue(shortBuf);
1053            wordBbuf.get(shortBuf);
1054            final int extraFieldLen = ZipShort.getValue(shortBuf);
1055            skipBytes(fileNameLen);
1056            final byte[] localExtraData = new byte[extraFieldLen];
1057            IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData));
1058            ze.setExtra(localExtraData);
1059            ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
1060                + SHORT + SHORT + fileNameLen + extraFieldLen);
1061            ze.setStreamContiguous(true);
1062
1063            if (entriesWithoutUTF8Flag.containsKey(ze)) {
1064                final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
1065                ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
1066                                                         nc.comment);
1067            }
1068
1069            final String name = ze.getName();
1070            LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name);
1071            if (entriesOfThatName == null) {
1072                entriesOfThatName = new LinkedList<>();
1073                nameMap.put(name, entriesOfThatName);
1074            }
1075            entriesOfThatName.addLast(ze);
1076        }
1077    }
1078
1079    /**
1080     * Checks whether the archive starts with a LFH.  If it doesn't,
1081     * it may be an empty archive.
1082     */
1083    private boolean startsWithLocalFileHeader() throws IOException {
1084        archive.position(0);
1085        wordBbuf.rewind();
1086        IOUtils.readFully(archive, wordBbuf);
1087        return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
1088    }
1089
1090    /**
1091     * Creates new BoundedInputStream, according to implementation of
1092     * underlying archive channel.
1093     */
1094    private BoundedInputStream createBoundedInputStream(long start, long remaining) {
1095        return archive instanceof FileChannel ?
1096            new BoundedFileChannelInputStream(start, remaining) :
1097            new BoundedInputStream(start, remaining);
1098    }
1099
1100    /**
1101     * InputStream that delegates requests to the underlying
1102     * SeekableByteChannel, making sure that only bytes from a certain
1103     * range can be read.
1104     */
1105    private class BoundedInputStream extends InputStream {
1106        private ByteBuffer singleByteBuffer;
1107        private final long end;
1108        private long loc;
1109
1110        BoundedInputStream(final long start, final long remaining) {
1111            this.end = start+remaining;
1112            if (this.end < start) {
1113                // check for potential vulnerability due to overflow
1114                throw new IllegalArgumentException("Invalid length of stream at offset="+start+", length="+remaining);
1115            }
1116            loc = start;
1117        }
1118
1119        @Override
1120        public synchronized int read() throws IOException {
1121            if (loc >= end) {
1122                return -1;
1123            }
1124            if (singleByteBuffer == null) {
1125                singleByteBuffer = ByteBuffer.allocate(1);
1126            }
1127            else {
1128                singleByteBuffer.rewind();
1129            }
1130            int read = read(loc, singleByteBuffer);
1131            if (read < 0) {
1132                return read;
1133            }
1134            loc++;
1135            return singleByteBuffer.get() & 0xff;
1136        }
1137
1138        @Override
1139        public synchronized int read(final byte[] b, final int off, int len) throws IOException {
1140            if (len <= 0) {
1141                return 0;
1142            }
1143
1144            if (len > end-loc) {
1145                if (loc >= end) {
1146                    return -1;
1147                }
1148                len = (int)(end-loc);
1149            }
1150
1151            ByteBuffer buf;
1152            buf = ByteBuffer.wrap(b, off, len);
1153            int ret = read(loc, buf);
1154            if (ret > 0) {
1155                loc += ret;
1156                return ret;
1157            }
1158            return ret;
1159        }
1160
1161        protected int read(long pos, ByteBuffer buf) throws IOException {
1162            int read;
1163            synchronized (archive) {
1164                archive.position(pos);
1165                read = archive.read(buf);
1166            }
1167            buf.flip();
1168            return read;
1169        }
1170    }
1171
1172    /**
1173     * Lock-free implementation of BoundedInputStream. The
1174     * implementation uses positioned reads on the underlying archive
1175     * file channel and therefore performs significantly faster in
1176     * concurrent environment.
1177     */
1178    private class BoundedFileChannelInputStream extends BoundedInputStream {
1179        private final FileChannel archive;
1180
1181        BoundedFileChannelInputStream(final long start, final long remaining) {
1182            super(start, remaining);
1183            archive = (FileChannel)ZipFile.this.archive;
1184        }
1185
1186        @Override
1187        protected int read(long pos, ByteBuffer buf) throws IOException {
1188            int read = archive.read(buf, pos);
1189            buf.flip();
1190            return read;
1191        }
1192    }
1193
1194    private static final class NameAndComment {
1195        private final byte[] name;
1196        private final byte[] comment;
1197        private NameAndComment(final byte[] name, final byte[] comment) {
1198            this.name = name;
1199            this.comment = comment;
1200        }
1201    }
1202
1203    /**
1204     * Compares two ZipArchiveEntries based on their offset within the archive.
1205     *
1206     * <p>Won't return any meaningful results if one of the entries
1207     * isn't part of the archive at all.</p>
1208     *
1209     * @since 1.1
1210     */
1211    private final Comparator<ZipArchiveEntry> offsetComparator =
1212        new Comparator<ZipArchiveEntry>() {
1213        @Override
1214        public int compare(final ZipArchiveEntry e1, final ZipArchiveEntry e2) {
1215            if (e1 == e2) {
1216                return 0;
1217            }
1218
1219            final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null;
1220            final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null;
1221            if (ent1 == null) {
1222                return 1;
1223            }
1224            if (ent2 == null) {
1225                return -1;
1226            }
1227            final long val = (ent1.getLocalHeaderOffset()
1228                        - ent2.getLocalHeaderOffset());
1229            return val == 0 ? 0 : val < 0 ? -1 : +1;
1230        }
1231    };
1232
1233    /**
1234     * Extends ZipArchiveEntry to store the offset within the archive.
1235     */
1236    private static class Entry extends ZipArchiveEntry {
1237
1238        Entry() {
1239        }
1240
1241        @Override
1242        public int hashCode() {
1243            return 3 * super.hashCode()
1244                + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32);
1245        }
1246
1247        @Override
1248        public boolean equals(final Object other) {
1249            if (super.equals(other)) {
1250                // super.equals would return false if other were not an Entry
1251                final Entry otherEntry = (Entry) other;
1252                return getLocalHeaderOffset()
1253                        == otherEntry.getLocalHeaderOffset()
1254                    && getDataOffset()
1255                        == otherEntry.getDataOffset();
1256            }
1257            return false;
1258        }
1259    }
1260}