001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.dump; 020 021import org.apache.commons.compress.archivers.ArchiveException; 022import org.apache.commons.compress.archivers.ArchiveInputStream; 023import org.apache.commons.compress.archivers.zip.ZipEncoding; 024import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 025 026import java.io.EOFException; 027import java.io.IOException; 028import java.io.InputStream; 029 030import java.util.Arrays; 031import java.util.Comparator; 032import java.util.HashMap; 033import java.util.Map; 034import java.util.PriorityQueue; 035import java.util.Queue; 036import java.util.Stack; 037 038/** 039 * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream. 040 * Methods are provided to position at each successive entry in 041 * the archive, and the read each entry as a normal input stream 042 * using read(). 043 * 044 * There doesn't seem to exist a hint on the encoding of string values 045 * in any piece documentation. Given the main purpose of dump/restore 046 * is backing up a system it seems very likely the format uses the 047 * current default encoding of the system. 048 * 049 * @NotThreadSafe 050 */ 051public class DumpArchiveInputStream extends ArchiveInputStream { 052 private DumpArchiveSummary summary; 053 private DumpArchiveEntry active; 054 private boolean isClosed; 055 private boolean hasHitEOF; 056 private long entrySize; 057 private long entryOffset; 058 private int readIdx; 059 private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE]; 060 private byte[] blockBuffer; 061 private int recordOffset; 062 private long filepos; 063 protected TapeInputStream raw; 064 065 // map of ino -> dirent entry. We can use this to reconstruct full paths. 066 private final Map<Integer, Dirent> names = new HashMap<Integer, Dirent>(); 067 068 // map of ino -> (directory) entry when we're missing one or more elements in the path. 069 private final Map<Integer, DumpArchiveEntry> pending = new HashMap<Integer, DumpArchiveEntry>(); 070 071 // queue of (directory) entries where we now have the full path. 072 private Queue<DumpArchiveEntry> queue; 073 074 /** 075 * The encoding to use for filenames and labels. 076 */ 077 private final ZipEncoding zipEncoding; 078 079 // the provided encoding (for unit tests) 080 final String encoding; 081 082 /** 083 * Constructor using the platform's default encoding for file 084 * names. 085 * 086 * @param is 087 * @throws ArchiveException 088 */ 089 public DumpArchiveInputStream(InputStream is) throws ArchiveException { 090 this(is, null); 091 } 092 093 /** 094 * Constructor. 095 * 096 * @param is 097 * @param encoding the encoding to use for file names, use null 098 * for the platform's default encoding 099 * @since 1.6 100 */ 101 public DumpArchiveInputStream(InputStream is, String encoding) 102 throws ArchiveException { 103 this.raw = new TapeInputStream(is); 104 this.hasHitEOF = false; 105 this.encoding = encoding; 106 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 107 108 try { 109 // read header, verify it's a dump archive. 110 byte[] headerBytes = raw.readRecord(); 111 112 if (!DumpArchiveUtil.verify(headerBytes)) { 113 throw new UnrecognizedFormatException(); 114 } 115 116 // get summary information 117 summary = new DumpArchiveSummary(headerBytes, this.zipEncoding); 118 119 // reset buffer with actual block size. 120 raw.resetBlockSize(summary.getNTRec(), summary.isCompressed()); 121 122 // allocate our read buffer. 123 blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE]; 124 125 // skip past CLRI and BITS segments since we don't handle them yet. 126 readCLRI(); 127 readBITS(); 128 } catch (IOException ex) { 129 throw new ArchiveException(ex.getMessage(), ex); 130 } 131 132 // put in a dummy record for the root node. 133 Dirent root = new Dirent(2, 2, 4, "."); 134 names.put(2, root); 135 136 // use priority based on queue to ensure parent directories are 137 // released first. 138 queue = new PriorityQueue<DumpArchiveEntry>(10, 139 new Comparator<DumpArchiveEntry>() { 140 public int compare(DumpArchiveEntry p, DumpArchiveEntry q) { 141 if (p.getOriginalName() == null || q.getOriginalName() == null) { 142 return Integer.MAX_VALUE; 143 } 144 145 return p.getOriginalName().compareTo(q.getOriginalName()); 146 } 147 }); 148 } 149 150 @Deprecated 151 @Override 152 public int getCount() { 153 return (int) getBytesRead(); 154 } 155 156 @Override 157 public long getBytesRead() { 158 return raw.getBytesRead(); 159 } 160 161 /** 162 * Return the archive summary information. 163 */ 164 public DumpArchiveSummary getSummary() { 165 return summary; 166 } 167 168 /** 169 * Read CLRI (deleted inode) segment. 170 */ 171 private void readCLRI() throws IOException { 172 byte[] buffer = raw.readRecord(); 173 174 if (!DumpArchiveUtil.verify(buffer)) { 175 throw new InvalidFormatException(); 176 } 177 178 active = DumpArchiveEntry.parse(buffer); 179 180 if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) { 181 throw new InvalidFormatException(); 182 } 183 184 // we don't do anything with this yet. 185 if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) 186 == -1) { 187 throw new EOFException(); 188 } 189 readIdx = active.getHeaderCount(); 190 } 191 192 /** 193 * Read BITS segment. 194 */ 195 private void readBITS() throws IOException { 196 byte[] buffer = raw.readRecord(); 197 198 if (!DumpArchiveUtil.verify(buffer)) { 199 throw new InvalidFormatException(); 200 } 201 202 active = DumpArchiveEntry.parse(buffer); 203 204 if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) { 205 throw new InvalidFormatException(); 206 } 207 208 // we don't do anything with this yet. 209 if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) 210 == -1) { 211 throw new EOFException(); 212 } 213 readIdx = active.getHeaderCount(); 214 } 215 216 /** 217 * Read the next entry. 218 */ 219 public DumpArchiveEntry getNextDumpEntry() throws IOException { 220 return getNextEntry(); 221 } 222 223 /** 224 * Read the next entry. 225 */ 226 @Override 227 public DumpArchiveEntry getNextEntry() throws IOException { 228 DumpArchiveEntry entry = null; 229 String path = null; 230 231 // is there anything in the queue? 232 if (!queue.isEmpty()) { 233 return queue.remove(); 234 } 235 236 while (entry == null) { 237 if (hasHitEOF) { 238 return null; 239 } 240 241 // skip any remaining records in this segment for prior file. 242 // we might still have holes... easiest to do it 243 // block by block. We may want to revisit this if 244 // the unnecessary decompression time adds up. 245 while (readIdx < active.getHeaderCount()) { 246 if (!active.isSparseRecord(readIdx++) 247 && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) { 248 throw new EOFException(); 249 } 250 } 251 252 readIdx = 0; 253 filepos = raw.getBytesRead(); 254 255 byte[] headerBytes = raw.readRecord(); 256 257 if (!DumpArchiveUtil.verify(headerBytes)) { 258 throw new InvalidFormatException(); 259 } 260 261 active = DumpArchiveEntry.parse(headerBytes); 262 263 // skip any remaining segments for prior file. 264 while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) { 265 if (raw.skip(DumpArchiveConstants.TP_SIZE 266 * (active.getHeaderCount() 267 - active.getHeaderHoles())) == -1) { 268 throw new EOFException(); 269 } 270 271 filepos = raw.getBytesRead(); 272 headerBytes = raw.readRecord(); 273 274 if (!DumpArchiveUtil.verify(headerBytes)) { 275 throw new InvalidFormatException(); 276 } 277 278 active = DumpArchiveEntry.parse(headerBytes); 279 } 280 281 // check if this is an end-of-volume marker. 282 if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) { 283 hasHitEOF = true; 284 285 return null; 286 } 287 288 entry = active; 289 290 if (entry.isDirectory()) { 291 readDirectoryEntry(active); 292 293 // now we create an empty InputStream. 294 entryOffset = 0; 295 entrySize = 0; 296 readIdx = active.getHeaderCount(); 297 } else { 298 entryOffset = 0; 299 entrySize = active.getEntrySize(); 300 readIdx = 0; 301 } 302 303 recordOffset = readBuf.length; 304 305 path = getPath(entry); 306 307 if (path == null) { 308 entry = null; 309 } 310 } 311 312 entry.setName(path); 313 entry.setSimpleName(names.get(entry.getIno()).getName()); 314 entry.setOffset(filepos); 315 316 return entry; 317 } 318 319 /** 320 * Read directory entry. 321 */ 322 private void readDirectoryEntry(DumpArchiveEntry entry) 323 throws IOException { 324 long size = entry.getEntrySize(); 325 boolean first = true; 326 327 while (first || 328 DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType()) { 329 // read the header that we just peeked at. 330 if (!first) { 331 raw.readRecord(); 332 } 333 334 if (!names.containsKey(entry.getIno()) && 335 DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType()) { 336 pending.put(entry.getIno(), entry); 337 } 338 339 int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount(); 340 341 if (blockBuffer.length < datalen) { 342 blockBuffer = new byte[datalen]; 343 } 344 345 if (raw.read(blockBuffer, 0, datalen) != datalen) { 346 throw new EOFException(); 347 } 348 349 int reclen = 0; 350 351 for (int i = 0; i < datalen - 8 && i < size - 8; 352 i += reclen) { 353 int ino = DumpArchiveUtil.convert32(blockBuffer, i); 354 reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4); 355 356 byte type = blockBuffer[i + 6]; 357 358 String name = DumpArchiveUtil.decode(zipEncoding, blockBuffer, i + 8, blockBuffer[i + 7]); 359 360 if (".".equals(name) || "..".equals(name)) { 361 // do nothing... 362 continue; 363 } 364 365 Dirent d = new Dirent(ino, entry.getIno(), type, name); 366 367 /* 368 if ((type == 4) && names.containsKey(ino)) { 369 System.out.println("we already have ino: " + 370 names.get(ino)); 371 } 372 */ 373 374 names.put(ino, d); 375 376 // check whether this allows us to fill anything in the pending list. 377 for (Map.Entry<Integer, DumpArchiveEntry> e : pending.entrySet()) { 378 String path = getPath(e.getValue()); 379 380 if (path != null) { 381 e.getValue().setName(path); 382 e.getValue() 383 .setSimpleName(names.get(e.getKey()).getName()); 384 queue.add(e.getValue()); 385 } 386 } 387 388 // remove anything that we found. (We can't do it earlier 389 // because of concurrent modification exceptions.) 390 for (DumpArchiveEntry e : queue) { 391 pending.remove(e.getIno()); 392 } 393 } 394 395 byte[] peekBytes = raw.peek(); 396 397 if (!DumpArchiveUtil.verify(peekBytes)) { 398 throw new InvalidFormatException(); 399 } 400 401 entry = DumpArchiveEntry.parse(peekBytes); 402 first = false; 403 size -= DumpArchiveConstants.TP_SIZE; 404 } 405 } 406 407 /** 408 * Get full path for specified archive entry, or null if there's a gap. 409 * 410 * @param entry 411 * @return full path for specified archive entry, or null if there's a gap. 412 */ 413 private String getPath(DumpArchiveEntry entry) { 414 // build the stack of elements. It's possible that we're 415 // still missing an intermediate value and if so we 416 Stack<String> elements = new Stack<String>(); 417 Dirent dirent = null; 418 419 for (int i = entry.getIno();; i = dirent.getParentIno()) { 420 if (!names.containsKey(i)) { 421 elements.clear(); 422 break; 423 } 424 425 dirent = names.get(i); 426 elements.push(dirent.getName()); 427 428 if (dirent.getIno() == dirent.getParentIno()) { 429 break; 430 } 431 } 432 433 // if an element is missing defer the work and read next entry. 434 if (elements.isEmpty()) { 435 pending.put(entry.getIno(), entry); 436 437 return null; 438 } 439 440 // generate full path from stack of elements. 441 StringBuilder sb = new StringBuilder(elements.pop()); 442 443 while (!elements.isEmpty()) { 444 sb.append('/'); 445 sb.append(elements.pop()); 446 } 447 448 return sb.toString(); 449 } 450 451 /** 452 * Reads bytes from the current dump archive entry. 453 * 454 * This method is aware of the boundaries of the current 455 * entry in the archive and will deal with them as if they 456 * were this stream's start and EOF. 457 * 458 * @param buf The buffer into which to place bytes read. 459 * @param off The offset at which to place bytes read. 460 * @param len The number of bytes to read. 461 * @return The number of bytes read, or -1 at EOF. 462 * @throws IOException on error 463 */ 464 @Override 465 public int read(byte[] buf, int off, int len) throws IOException { 466 int totalRead = 0; 467 468 if (hasHitEOF || isClosed || entryOffset >= entrySize) { 469 return -1; 470 } 471 472 if (active == null) { 473 throw new IllegalStateException("No current dump entry"); 474 } 475 476 if (len + entryOffset > entrySize) { 477 len = (int) (entrySize - entryOffset); 478 } 479 480 while (len > 0) { 481 int sz = len > readBuf.length - recordOffset 482 ? readBuf.length - recordOffset : len; 483 484 // copy any data we have 485 if (recordOffset + sz <= readBuf.length) { 486 System.arraycopy(readBuf, recordOffset, buf, off, sz); 487 totalRead += sz; 488 recordOffset += sz; 489 len -= sz; 490 off += sz; 491 } 492 493 // load next block if necessary. 494 if (len > 0) { 495 if (readIdx >= 512) { 496 byte[] headerBytes = raw.readRecord(); 497 498 if (!DumpArchiveUtil.verify(headerBytes)) { 499 throw new InvalidFormatException(); 500 } 501 502 active = DumpArchiveEntry.parse(headerBytes); 503 readIdx = 0; 504 } 505 506 if (!active.isSparseRecord(readIdx++)) { 507 int r = raw.read(readBuf, 0, readBuf.length); 508 if (r != readBuf.length) { 509 throw new EOFException(); 510 } 511 } else { 512 Arrays.fill(readBuf, (byte) 0); 513 } 514 515 recordOffset = 0; 516 } 517 } 518 519 entryOffset += totalRead; 520 521 return totalRead; 522 } 523 524 /** 525 * Closes the stream for this entry. 526 */ 527 @Override 528 public void close() throws IOException { 529 if (!isClosed) { 530 isClosed = true; 531 raw.close(); 532 } 533 } 534 535 /** 536 * Look at the first few bytes of the file to decide if it's a dump 537 * archive. With 32 bytes we can look at the magic value, with a full 538 * 1k we can verify the checksum. 539 */ 540 public static boolean matches(byte[] buffer, int length) { 541 // do we have enough of the header? 542 if (length < 32) { 543 return false; 544 } 545 546 // this is the best test 547 if (length >= DumpArchiveConstants.TP_SIZE) { 548 return DumpArchiveUtil.verify(buffer); 549 } 550 551 // this will work in a pinch. 552 return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer, 553 24); 554 } 555 556}