001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018 019 /* 020 * This package is based on the work done by Timothy Gerard Endres 021 * (time@ice.com) to whom the Ant project is very grateful for his great code. 022 */ 023 024 package org.apache.commons.compress.archivers.tar; 025 026 import java.io.IOException; 027 import java.io.InputStream; 028 import org.apache.commons.compress.archivers.ArchiveEntry; 029 import org.apache.commons.compress.archivers.ArchiveInputStream; 030 import org.apache.commons.compress.utils.ArchiveUtils; 031 032 /** 033 * The TarInputStream reads a UNIX tar archive as an InputStream. 034 * methods are provided to position at each successive entry in 035 * the archive, and the read each entry as a normal input stream 036 * using read(). 037 * @NotThreadSafe 038 */ 039 public class TarArchiveInputStream extends ArchiveInputStream { 040 private static final int SMALL_BUFFER_SIZE = 256; 041 private static final int BUFFER_SIZE = 8 * 1024; 042 043 private boolean hasHitEOF; 044 private long entrySize; 045 private long entryOffset; 046 private byte[] readBuf; 047 protected final TarBuffer buffer; 048 private TarArchiveEntry currEntry; 049 050 /** 051 * Constructor for TarInputStream. 052 * @param is the input stream to use 053 */ 054 public TarArchiveInputStream(InputStream is) { 055 this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); 056 } 057 058 /** 059 * Constructor for TarInputStream. 060 * @param is the input stream to use 061 * @param blockSize the block size to use 062 */ 063 public TarArchiveInputStream(InputStream is, int blockSize) { 064 this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE); 065 } 066 067 /** 068 * Constructor for TarInputStream. 069 * @param is the input stream to use 070 * @param blockSize the block size to use 071 * @param recordSize the record size to use 072 */ 073 public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) { 074 this.buffer = new TarBuffer(is, blockSize, recordSize); 075 this.readBuf = null; 076 this.hasHitEOF = false; 077 } 078 079 /** 080 * Closes this stream. Calls the TarBuffer's close() method. 081 * @throws IOException on error 082 */ 083 public void close() throws IOException { 084 buffer.close(); 085 } 086 087 /** 088 * Get the record size being used by this stream's TarBuffer. 089 * 090 * @return The TarBuffer record size. 091 */ 092 public int getRecordSize() { 093 return buffer.getRecordSize(); 094 } 095 096 /** 097 * Get the available data that can be read from the current 098 * entry in the archive. This does not indicate how much data 099 * is left in the entire archive, only in the current entry. 100 * This value is determined from the entry's size header field 101 * and the amount of data already read from the current entry. 102 * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE 103 * bytes are left in the current entry in the archive. 104 * 105 * @return The number of available bytes for the current entry. 106 * @throws IOException for signature 107 */ 108 public int available() throws IOException { 109 if (entrySize - entryOffset > Integer.MAX_VALUE) { 110 return Integer.MAX_VALUE; 111 } 112 return (int) (entrySize - entryOffset); 113 } 114 115 /** 116 * Skip bytes in the input buffer. This skips bytes in the 117 * current entry's data, not the entire archive, and will 118 * stop at the end of the current entry's data if the number 119 * to skip extends beyond that point. 120 * 121 * @param numToSkip The number of bytes to skip. 122 * @return the number actually skipped 123 * @throws IOException on error 124 */ 125 public long skip(long numToSkip) throws IOException { 126 // REVIEW 127 // This is horribly inefficient, but it ensures that we 128 // properly skip over bytes via the TarBuffer... 129 // 130 byte[] skipBuf = new byte[BUFFER_SIZE]; 131 long skip = numToSkip; 132 while (skip > 0) { 133 int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip); 134 int numRead = read(skipBuf, 0, realSkip); 135 if (numRead == -1) { 136 break; 137 } 138 skip -= numRead; 139 } 140 return (numToSkip - skip); 141 } 142 143 /** 144 * Since we do not support marking just yet, we do nothing. 145 */ 146 public void reset() { 147 } 148 149 /** 150 * Get the next entry in this tar archive. This will skip 151 * over any remaining data in the current entry, if there 152 * is one, and place the input stream at the header of the 153 * next entry, and read the header and instantiate a new 154 * TarEntry from the header bytes and return that entry. 155 * If there are no more entries in the archive, null will 156 * be returned to indicate that the end of the archive has 157 * been reached. 158 * 159 * @return The next TarEntry in the archive, or null. 160 * @throws IOException on error 161 */ 162 public TarArchiveEntry getNextTarEntry() throws IOException { 163 if (hasHitEOF) { 164 return null; 165 } 166 167 if (currEntry != null) { 168 long numToSkip = entrySize - entryOffset; 169 170 while (numToSkip > 0) { 171 long skipped = skip(numToSkip); 172 if (skipped <= 0) { 173 throw new RuntimeException("failed to skip current tar entry"); 174 } 175 numToSkip -= skipped; 176 } 177 178 readBuf = null; 179 } 180 181 byte[] headerBuf = buffer.readRecord(); 182 183 if (headerBuf == null) { 184 hasHitEOF = true; 185 } else if (buffer.isEOFRecord(headerBuf)) { 186 hasHitEOF = true; 187 } 188 189 if (hasHitEOF) { 190 currEntry = null; 191 } else { 192 currEntry = new TarArchiveEntry(headerBuf); 193 entryOffset = 0; 194 entrySize = currEntry.getSize(); 195 } 196 197 if (currEntry != null && currEntry.isGNULongNameEntry()) { 198 // read in the name 199 StringBuffer longName = new StringBuffer(); 200 byte[] buf = new byte[SMALL_BUFFER_SIZE]; 201 int length = 0; 202 while ((length = read(buf)) >= 0) { 203 longName.append(new String(buf, 0, length)); 204 } 205 getNextEntry(); 206 if (currEntry == null) { 207 // Bugzilla: 40334 208 // Malformed tar file - long entry name not followed by entry 209 return null; 210 } 211 // remove trailing null terminator 212 if (longName.length() > 0 213 && longName.charAt(longName.length() - 1) == 0) { 214 longName.deleteCharAt(longName.length() - 1); 215 } 216 currEntry.setName(longName.toString()); 217 } 218 219 return currEntry; 220 } 221 222 public ArchiveEntry getNextEntry() throws IOException { 223 return getNextTarEntry(); 224 } 225 226 /** 227 * Reads bytes from the current tar archive entry. 228 * 229 * This method is aware of the boundaries of the current 230 * entry in the archive and will deal with them as if they 231 * were this stream's start and EOF. 232 * 233 * @param buf The buffer into which to place bytes read. 234 * @param offset The offset at which to place bytes read. 235 * @param numToRead The number of bytes to read. 236 * @return The number of bytes read, or -1 at EOF. 237 * @throws IOException on error 238 */ 239 public int read(byte[] buf, int offset, int numToRead) throws IOException { 240 int totalRead = 0; 241 242 if (entryOffset >= entrySize) { 243 return -1; 244 } 245 246 if ((numToRead + entryOffset) > entrySize) { 247 numToRead = (int) (entrySize - entryOffset); 248 } 249 250 if (readBuf != null) { 251 int sz = (numToRead > readBuf.length) ? readBuf.length 252 : numToRead; 253 254 System.arraycopy(readBuf, 0, buf, offset, sz); 255 256 if (sz >= readBuf.length) { 257 readBuf = null; 258 } else { 259 int newLen = readBuf.length - sz; 260 byte[] newBuf = new byte[newLen]; 261 262 System.arraycopy(readBuf, sz, newBuf, 0, newLen); 263 264 readBuf = newBuf; 265 } 266 267 totalRead += sz; 268 numToRead -= sz; 269 offset += sz; 270 } 271 272 while (numToRead > 0) { 273 byte[] rec = buffer.readRecord(); 274 275 if (rec == null) { 276 // Unexpected EOF! 277 throw new IOException("unexpected EOF with " + numToRead 278 + " bytes unread. Occured at byte: " + getCount()); 279 } 280 count(rec.length); 281 int sz = numToRead; 282 int recLen = rec.length; 283 284 if (recLen > sz) { 285 System.arraycopy(rec, 0, buf, offset, sz); 286 287 readBuf = new byte[recLen - sz]; 288 289 System.arraycopy(rec, sz, readBuf, 0, recLen - sz); 290 } else { 291 sz = recLen; 292 293 System.arraycopy(rec, 0, buf, offset, recLen); 294 } 295 296 totalRead += sz; 297 numToRead -= sz; 298 offset += sz; 299 } 300 301 entryOffset += totalRead; 302 303 return totalRead; 304 } 305 306 protected final TarArchiveEntry getCurrentEntry() { 307 return currEntry; 308 } 309 310 protected final void setCurrentEntry(TarArchiveEntry e) { 311 currEntry = e; 312 } 313 314 protected final boolean isAtEOF() { 315 return hasHitEOF; 316 } 317 318 protected final void setAtEOF(boolean b) { 319 hasHitEOF = b; 320 } 321 322 // ArchiveInputStream 323 324 public static boolean matches(byte[] signature, int length) { 325 if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) { 326 return false; 327 } 328 329 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, 330 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 331 && 332 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, 333 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 334 ){ 335 return true; 336 } 337 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, 338 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) 339 && 340 ( 341 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, 342 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 343 || 344 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, 345 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) 346 ) 347 ){ 348 return true; 349 } 350 return false; 351 } 352 353 }