001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 package org.apache.commons.compress.archivers.zip; 019 020 import java.io.File; 021 import java.io.IOException; 022 import java.io.InputStream; 023 import java.io.RandomAccessFile; 024 import java.util.Collections; 025 import java.util.Enumeration; 026 import java.util.HashMap; 027 import java.util.Map; 028 import java.util.zip.Inflater; 029 import java.util.zip.InflaterInputStream; 030 import java.util.zip.ZipException; 031 032 /** 033 * Replacement for <code>java.util.ZipFile</code>. 034 * 035 * <p>This class adds support for file name encodings other than UTF-8 036 * (which is required to work on ZIP files created by native zip tools 037 * and is able to skip a preamble like the one found in self 038 * extracting archives. Furthermore it returns instances of 039 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 040 * instead of <code>java.util.zip.ZipEntry</code>.</p> 041 * 042 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would 043 * have to reimplement all methods anyway. Like 044 * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the 045 * covers and supports compressed and uncompressed entries.</p> 046 * 047 * <p>The method signatures mimic the ones of 048 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: 049 * 050 * <ul> 051 * <li>There is no getName method.</li> 052 * <li>entries has been renamed to getEntries.</li> 053 * <li>getEntries and getEntry return 054 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 055 * instances.</li> 056 * <li>close is allowed to throw IOException.</li> 057 * </ul> 058 * 059 */ 060 public class ZipFile { 061 private static final int HASH_SIZE = 509; 062 private static final int SHORT = 2; 063 private static final int WORD = 4; 064 static final int NIBLET_MASK = 0x0f; 065 static final int BYTE_SHIFT = 8; 066 private static final int POS_0 = 0; 067 private static final int POS_1 = 1; 068 private static final int POS_2 = 2; 069 private static final int POS_3 = 3; 070 071 /** 072 * Maps ZipArchiveEntrys to Longs, recording the offsets of the local 073 * file headers. 074 */ 075 private final Map entries = new HashMap(HASH_SIZE); 076 077 /** 078 * Maps String to ZipArchiveEntrys, name -> actual entry. 079 */ 080 private final Map nameMap = new HashMap(HASH_SIZE); 081 082 private static final class OffsetEntry { 083 private long headerOffset = -1; 084 private long dataOffset = -1; 085 } 086 087 /** 088 * The encoding to use for filenames and the file comment. 089 * 090 * <p>For a list of possible values see <a 091 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 092 * Defaults to UTF-8.</p> 093 */ 094 private final String encoding; 095 096 /** 097 * The zip encoding to use for filenames and the file comment. 098 */ 099 private final ZipEncoding zipEncoding; 100 101 /** 102 * The actual data source. 103 */ 104 private final RandomAccessFile archive; 105 106 /** 107 * Whether to look for and use Unicode extra fields. 108 */ 109 private final boolean useUnicodeExtraFields; 110 111 /** 112 * Opens the given file for reading, assuming "UTF8" for file names. 113 * 114 * @param f the archive. 115 * 116 * @throws IOException if an error occurs while reading the file. 117 */ 118 public ZipFile(File f) throws IOException { 119 this(f, ZipEncodingHelper.UTF8); 120 } 121 122 /** 123 * Opens the given file for reading, assuming "UTF8". 124 * 125 * @param name name of the archive. 126 * 127 * @throws IOException if an error occurs while reading the file. 128 */ 129 public ZipFile(String name) throws IOException { 130 this(new File(name), ZipEncodingHelper.UTF8); 131 } 132 133 /** 134 * Opens the given file for reading, assuming the specified 135 * encoding for file names, scanning unicode extra fields. 136 * 137 * @param name name of the archive. 138 * @param encoding the encoding to use for file names, use null 139 * for the platform's default encoding 140 * 141 * @throws IOException if an error occurs while reading the file. 142 */ 143 public ZipFile(String name, String encoding) throws IOException { 144 this(new File(name), encoding, true); 145 } 146 147 /** 148 * Opens the given file for reading, assuming the specified 149 * encoding for file names and scanning for unicode extra fields. 150 * 151 * @param f the archive. 152 * @param encoding the encoding to use for file names, use null 153 * for the platform's default encoding 154 * 155 * @throws IOException if an error occurs while reading the file. 156 */ 157 public ZipFile(File f, String encoding) throws IOException { 158 this(f, encoding, true); 159 } 160 161 /** 162 * Opens the given file for reading, assuming the specified 163 * encoding for file names. 164 * 165 * @param f the archive. 166 * @param encoding the encoding to use for file names, use null 167 * for the platform's default encoding 168 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 169 * Extra Fields (if present) to set the file names. 170 * 171 * @throws IOException if an error occurs while reading the file. 172 */ 173 public ZipFile(File f, String encoding, boolean useUnicodeExtraFields) 174 throws IOException { 175 this.encoding = encoding; 176 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 177 this.useUnicodeExtraFields = useUnicodeExtraFields; 178 archive = new RandomAccessFile(f, "r"); 179 boolean success = false; 180 try { 181 Map entriesWithoutEFS = populateFromCentralDirectory(); 182 resolveLocalFileHeaderData(entriesWithoutEFS); 183 success = true; 184 } finally { 185 if (!success) { 186 try { 187 archive.close(); 188 } catch (IOException e2) { 189 // swallow, throw the original exception instead 190 } 191 } 192 } 193 } 194 195 /** 196 * The encoding to use for filenames and the file comment. 197 * 198 * @return null if using the platform's default character encoding. 199 */ 200 public String getEncoding() { 201 return encoding; 202 } 203 204 /** 205 * Closes the archive. 206 * @throws IOException if an error occurs closing the archive. 207 */ 208 public void close() throws IOException { 209 archive.close(); 210 } 211 212 /** 213 * close a zipfile quietly; throw no io fault, do nothing 214 * on a null parameter 215 * @param zipfile file to close, can be null 216 */ 217 public static void closeQuietly(ZipFile zipfile) { 218 if (zipfile != null) { 219 try { 220 zipfile.close(); 221 } catch (IOException e) { 222 //ignore 223 } 224 } 225 } 226 227 /** 228 * Returns all entries. 229 * @return all entries as {@link ZipArchiveEntry} instances 230 */ 231 public Enumeration getEntries() { 232 return Collections.enumeration(entries.keySet()); 233 } 234 235 /** 236 * Returns a named entry - or <code>null</code> if no entry by 237 * that name exists. 238 * @param name name of the entry. 239 * @return the ZipArchiveEntry corresponding to the given name - or 240 * <code>null</code> if not present. 241 */ 242 public ZipArchiveEntry getEntry(String name) { 243 return (ZipArchiveEntry) nameMap.get(name); 244 } 245 246 /** 247 * Returns an InputStream for reading the contents of the given entry. 248 * @param ze the entry to get the stream for. 249 * @return a stream to read the entry from. 250 * @throws IOException if unable to create an input stream from the zipenty 251 * @throws ZipException if the zipentry has an unsupported 252 * compression method 253 */ 254 public InputStream getInputStream(ZipArchiveEntry ze) 255 throws IOException, ZipException { 256 OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze); 257 if (offsetEntry == null) { 258 return null; 259 } 260 long start = offsetEntry.dataOffset; 261 BoundedInputStream bis = 262 new BoundedInputStream(start, ze.getCompressedSize()); 263 switch (ze.getMethod()) { 264 case ZipArchiveEntry.STORED: 265 return bis; 266 case ZipArchiveEntry.DEFLATED: 267 bis.addDummy(); 268 return new InflaterInputStream(bis, new Inflater(true)); 269 default: 270 throw new ZipException("Found unsupported compression method " 271 + ze.getMethod()); 272 } 273 } 274 275 private static final int CFH_LEN = 276 /* version made by */ SHORT 277 /* version needed to extract */ + SHORT 278 /* general purpose bit flag */ + SHORT 279 /* compression method */ + SHORT 280 /* last mod file time */ + SHORT 281 /* last mod file date */ + SHORT 282 /* crc-32 */ + WORD 283 /* compressed size */ + WORD 284 /* uncompressed size */ + WORD 285 /* filename length */ + SHORT 286 /* extra field length */ + SHORT 287 /* file comment length */ + SHORT 288 /* disk number start */ + SHORT 289 /* internal file attributes */ + SHORT 290 /* external file attributes */ + WORD 291 /* relative offset of local header */ + WORD; 292 293 /** 294 * Reads the central directory of the given archive and populates 295 * the internal tables with ZipArchiveEntry instances. 296 * 297 * <p>The ZipArchiveEntrys will know all data that can be obtained from 298 * the central directory alone, but not the data that requires the 299 * local file header or additional data to be read.</p> 300 * 301 * @return a Map<ZipArchiveEntry, NameAndComment>> of 302 * zipentries that didn't have the language encoding flag set when 303 * read. 304 */ 305 private Map populateFromCentralDirectory() 306 throws IOException { 307 HashMap noEFS = new HashMap(); 308 309 positionAtCentralDirectory(); 310 311 byte[] cfh = new byte[CFH_LEN]; 312 313 byte[] signatureBytes = new byte[WORD]; 314 archive.readFully(signatureBytes); 315 long sig = ZipLong.getValue(signatureBytes); 316 final long cfhSig = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 317 if (sig != cfhSig && startsWithLocalFileHeader()) { 318 throw new IOException("central directory is empty, can't expand" 319 + " corrupt archive."); 320 } 321 while (sig == cfhSig) { 322 archive.readFully(cfh); 323 int off = 0; 324 ZipArchiveEntry ze = new ZipArchiveEntry(); 325 326 int versionMadeBy = ZipShort.getValue(cfh, off); 327 off += SHORT; 328 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 329 330 off += SHORT; // skip version info 331 332 final int generalPurposeFlag = ZipShort.getValue(cfh, off); 333 final boolean hasEFS = 334 (generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0; 335 final ZipEncoding entryEncoding = 336 hasEFS ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 337 338 off += SHORT; 339 340 ze.setMethod(ZipShort.getValue(cfh, off)); 341 off += SHORT; 342 343 // FIXME this is actually not very cpu cycles friendly as we are converting from 344 // dos to java while the underlying Sun implementation will convert 345 // from java to dos time for internal storage... 346 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfh, off)); 347 ze.setTime(time); 348 off += WORD; 349 350 ze.setCrc(ZipLong.getValue(cfh, off)); 351 off += WORD; 352 353 ze.setCompressedSize(ZipLong.getValue(cfh, off)); 354 off += WORD; 355 356 ze.setSize(ZipLong.getValue(cfh, off)); 357 off += WORD; 358 359 int fileNameLen = ZipShort.getValue(cfh, off); 360 off += SHORT; 361 362 int extraLen = ZipShort.getValue(cfh, off); 363 off += SHORT; 364 365 int commentLen = ZipShort.getValue(cfh, off); 366 off += SHORT; 367 368 off += SHORT; // disk number 369 370 ze.setInternalAttributes(ZipShort.getValue(cfh, off)); 371 off += SHORT; 372 373 ze.setExternalAttributes(ZipLong.getValue(cfh, off)); 374 off += WORD; 375 376 byte[] fileName = new byte[fileNameLen]; 377 archive.readFully(fileName); 378 ze.setName(entryEncoding.decode(fileName)); 379 380 // LFH offset, 381 OffsetEntry offset = new OffsetEntry(); 382 offset.headerOffset = ZipLong.getValue(cfh, off); 383 // data offset will be filled later 384 entries.put(ze, offset); 385 386 nameMap.put(ze.getName(), ze); 387 388 byte[] cdExtraData = new byte[extraLen]; 389 archive.readFully(cdExtraData); 390 ze.setCentralDirectoryExtra(cdExtraData); 391 392 byte[] comment = new byte[commentLen]; 393 archive.readFully(comment); 394 ze.setComment(entryEncoding.decode(comment)); 395 396 archive.readFully(signatureBytes); 397 sig = ZipLong.getValue(signatureBytes); 398 399 if (!hasEFS && useUnicodeExtraFields) { 400 noEFS.put(ze, new NameAndComment(fileName, comment)); 401 } 402 } 403 return noEFS; 404 } 405 406 private static final int MIN_EOCD_SIZE = 407 /* end of central dir signature */ WORD 408 /* number of this disk */ + SHORT 409 /* number of the disk with the */ 410 /* start of the central directory */ + SHORT 411 /* total number of entries in */ 412 /* the central dir on this disk */ + SHORT 413 /* total number of entries in */ 414 /* the central dir */ + SHORT 415 /* size of the central directory */ + WORD 416 /* offset of start of central */ 417 /* directory with respect to */ 418 /* the starting disk number */ + WORD 419 /* zipfile comment length */ + SHORT; 420 421 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 422 /* maximum length of zipfile comment */ + 0xFFFF; 423 424 private static final int CFD_LOCATOR_OFFSET = 425 /* end of central dir signature */ WORD 426 /* number of this disk */ + SHORT 427 /* number of the disk with the */ 428 /* start of the central directory */ + SHORT 429 /* total number of entries in */ 430 /* the central dir on this disk */ + SHORT 431 /* total number of entries in */ 432 /* the central dir */ + SHORT 433 /* size of the central directory */ + WORD; 434 435 /** 436 * Searches for the "End of central dir record", parses 437 * it and positions the stream at the first central directory 438 * record. 439 */ 440 private void positionAtCentralDirectory() 441 throws IOException { 442 boolean found = false; 443 long off = archive.length() - MIN_EOCD_SIZE; 444 long stopSearching = Math.max(0L, archive.length() - MAX_EOCD_SIZE); 445 if (off >= 0) { 446 archive.seek(off); 447 byte[] sig = ZipArchiveOutputStream.EOCD_SIG; 448 int curr = archive.read(); 449 while (off >= stopSearching && curr != -1) { 450 if (curr == sig[POS_0]) { 451 curr = archive.read(); 452 if (curr == sig[POS_1]) { 453 curr = archive.read(); 454 if (curr == sig[POS_2]) { 455 curr = archive.read(); 456 if (curr == sig[POS_3]) { 457 found = true; 458 break; 459 } 460 } 461 } 462 } 463 archive.seek(--off); 464 curr = archive.read(); 465 } 466 } 467 if (!found) { 468 throw new ZipException("archive is not a ZIP archive"); 469 } 470 archive.seek(off + CFD_LOCATOR_OFFSET); 471 byte[] cfdOffset = new byte[WORD]; 472 archive.readFully(cfdOffset); 473 archive.seek(ZipLong.getValue(cfdOffset)); 474 } 475 476 /** 477 * Number of bytes in local file header up to the "length of 478 * filename" entry. 479 */ 480 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 481 /* local file header signature */ WORD 482 /* version needed to extract */ + SHORT 483 /* general purpose bit flag */ + SHORT 484 /* compression method */ + SHORT 485 /* last mod file time */ + SHORT 486 /* last mod file date */ + SHORT 487 /* crc-32 */ + WORD 488 /* compressed size */ + WORD 489 /* uncompressed size */ + WORD; 490 491 /** 492 * Walks through all recorded entries and adds the data available 493 * from the local file header. 494 * 495 * <p>Also records the offsets for the data to read from the 496 * entries.</p> 497 */ 498 private void resolveLocalFileHeaderData(Map entriesWithoutEFS) 499 throws IOException { 500 Enumeration e = getEntries(); 501 while (e.hasMoreElements()) { 502 ZipArchiveEntry ze = (ZipArchiveEntry) e.nextElement(); 503 OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze); 504 long offset = offsetEntry.headerOffset; 505 archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 506 byte[] b = new byte[SHORT]; 507 archive.readFully(b); 508 int fileNameLen = ZipShort.getValue(b); 509 archive.readFully(b); 510 int extraFieldLen = ZipShort.getValue(b); 511 int lenToSkip = fileNameLen; 512 while (lenToSkip > 0) { 513 int skipped = archive.skipBytes(lenToSkip); 514 if (skipped <= 0) { 515 throw new RuntimeException("failed to skip file name in" 516 + " local file header"); 517 } 518 lenToSkip -= skipped; 519 } 520 byte[] localExtraData = new byte[extraFieldLen]; 521 archive.readFully(localExtraData); 522 ze.setExtra(localExtraData); 523 /*dataOffsets.put(ze, 524 new Long(offset + LFH_OFFSET_FOR_FILENAME_LENGTH 525 + SHORT + SHORT + fileNameLen + extraFieldLen)); 526 */ 527 offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH 528 + SHORT + SHORT + fileNameLen + extraFieldLen; 529 530 if (entriesWithoutEFS.containsKey(ze)) { 531 String orig = ze.getName(); 532 NameAndComment nc = (NameAndComment) entriesWithoutEFS.get(ze); 533 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 534 nc.comment); 535 if (!orig.equals(ze.getName())) { 536 nameMap.remove(orig); 537 nameMap.put(ze.getName(), ze); 538 } 539 } 540 } 541 } 542 543 /** 544 * Checks whether the archive starts with a LFH. If it doesn't, 545 * it may be an empty archive. 546 */ 547 private boolean startsWithLocalFileHeader() throws IOException { 548 archive.seek(0); 549 final byte[] start = new byte[WORD]; 550 archive.readFully(start); 551 for (int i = 0; i < start.length; i++) { 552 if (start[i] != ZipArchiveOutputStream.LFH_SIG[i]) { 553 return false; 554 } 555 } 556 return true; 557 } 558 559 /** 560 * InputStream that delegates requests to the underlying 561 * RandomAccessFile, making sure that only bytes from a certain 562 * range can be read. 563 */ 564 private class BoundedInputStream extends InputStream { 565 private long remaining; 566 private long loc; 567 private boolean addDummyByte = false; 568 569 BoundedInputStream(long start, long remaining) { 570 this.remaining = remaining; 571 loc = start; 572 } 573 574 public int read() throws IOException { 575 if (remaining-- <= 0) { 576 if (addDummyByte) { 577 addDummyByte = false; 578 return 0; 579 } 580 return -1; 581 } 582 synchronized (archive) { 583 archive.seek(loc++); 584 return archive.read(); 585 } 586 } 587 588 public int read(byte[] b, int off, int len) throws IOException { 589 if (remaining <= 0) { 590 if (addDummyByte) { 591 addDummyByte = false; 592 b[off] = 0; 593 return 1; 594 } 595 return -1; 596 } 597 598 if (len <= 0) { 599 return 0; 600 } 601 602 if (len > remaining) { 603 len = (int) remaining; 604 } 605 int ret = -1; 606 synchronized (archive) { 607 archive.seek(loc); 608 ret = archive.read(b, off, len); 609 } 610 if (ret > 0) { 611 loc += ret; 612 remaining -= ret; 613 } 614 return ret; 615 } 616 617 /** 618 * Inflater needs an extra dummy byte for nowrap - see 619 * Inflater's javadocs. 620 */ 621 void addDummy() { 622 addDummyByte = true; 623 } 624 } 625 626 private static final class NameAndComment { 627 private final byte[] name; 628 private final byte[] comment; 629 private NameAndComment(byte[] name, byte[] comment) { 630 this.name = name; 631 this.comment = comment; 632 } 633 } 634 }