001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.tar; 020 021import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN; 022import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET; 023 024import java.io.IOException; 025import java.math.BigInteger; 026import java.nio.ByteBuffer; 027import org.apache.commons.compress.archivers.zip.ZipEncoding; 028import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 029 030/** 031 * This class provides static utility methods to work with byte streams. 032 * 033 * @Immutable 034 */ 035// CheckStyle:HideUtilityClassConstructorCheck OFF (bc) 036public class TarUtils { 037 038 private static final int BYTE_MASK = 255; 039 040 static final ZipEncoding DEFAULT_ENCODING = 041 ZipEncodingHelper.getZipEncoding(null); 042 043 /** 044 * Encapsulates the algorithms used up to Commons Compress 1.3 as 045 * ZipEncoding. 046 */ 047 static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() { 048 public boolean canEncode(String name) { return true; } 049 050 public ByteBuffer encode(String name) { 051 final int length = name.length(); 052 byte[] buf = new byte[length]; 053 054 // copy until end of input or output is reached. 055 for (int i = 0; i < length; ++i) { 056 buf[i] = (byte) name.charAt(i); 057 } 058 return ByteBuffer.wrap(buf); 059 } 060 061 public String decode(byte[] buffer) { 062 final int length = buffer.length; 063 StringBuilder result = new StringBuilder(length); 064 065 for (byte b : buffer) { 066 if (b == 0) { // Trailing null 067 break; 068 } 069 result.append((char) (b & 0xFF)); // Allow for sign-extension 070 } 071 072 return result.toString(); 073 } 074 }; 075 076 /** Private constructor to prevent instantiation of this utility class. */ 077 private TarUtils(){ 078 } 079 080 /** 081 * Parse an octal string from a buffer. 082 * 083 * <p>Leading spaces are ignored. 084 * The buffer must contain a trailing space or NUL, 085 * and may contain an additional trailing space or NUL.</p> 086 * 087 * <p>The input buffer is allowed to contain all NULs, 088 * in which case the method returns 0L 089 * (this allows for missing fields).</p> 090 * 091 * <p>To work-around some tar implementations that insert a 092 * leading NUL this method returns 0 if it detects a leading NUL 093 * since Commons Compress 1.4.</p> 094 * 095 * @param buffer The buffer from which to parse. 096 * @param offset The offset into the buffer from which to parse. 097 * @param length The maximum number of bytes to parse - must be at least 2 bytes. 098 * @return The long value of the octal string. 099 * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected. 100 */ 101 public static long parseOctal(final byte[] buffer, final int offset, final int length) { 102 long result = 0; 103 int end = offset + length; 104 int start = offset; 105 106 if (length < 2){ 107 throw new IllegalArgumentException("Length "+length+" must be at least 2"); 108 } 109 110 if (buffer[start] == 0) { 111 return 0L; 112 } 113 114 // Skip leading spaces 115 while (start < end){ 116 if (buffer[start] == ' '){ 117 start++; 118 } else { 119 break; 120 } 121 } 122 123 // Trim all trailing NULs and spaces. 124 // The ustar and POSIX tar specs require a trailing NUL or 125 // space but some implementations use the extra digit for big 126 // sizes/uids/gids ... 127 byte trailer = buffer[end - 1]; 128 while (start < end && (trailer == 0 || trailer == ' ')) { 129 end--; 130 trailer = buffer[end - 1]; 131 } 132 133 for ( ;start < end; start++) { 134 final byte currentByte = buffer[start]; 135 // CheckStyle:MagicNumber OFF 136 if (currentByte < '0' || currentByte > '7'){ 137 throw new IllegalArgumentException( 138 exceptionMessage(buffer, offset, length, start, currentByte)); 139 } 140 result = (result << 3) + (currentByte - '0'); // convert from ASCII 141 // CheckStyle:MagicNumber ON 142 } 143 144 return result; 145 } 146 147 /** 148 * Compute the value contained in a byte buffer. If the most 149 * significant bit of the first byte in the buffer is set, this 150 * bit is ignored and the rest of the buffer is interpreted as a 151 * binary number. Otherwise, the buffer is interpreted as an 152 * octal number as per the parseOctal function above. 153 * 154 * @param buffer The buffer from which to parse. 155 * @param offset The offset into the buffer from which to parse. 156 * @param length The maximum number of bytes to parse. 157 * @return The long value of the octal or binary string. 158 * @throws IllegalArgumentException if the trailing space/NUL is 159 * missing or an invalid byte is detected in an octal number, or 160 * if a binary number would exceed the size of a signed long 161 * 64-bit integer. 162 * @since 1.4 163 */ 164 public static long parseOctalOrBinary(final byte[] buffer, final int offset, 165 final int length) { 166 167 if ((buffer[offset] & 0x80) == 0) { 168 return parseOctal(buffer, offset, length); 169 } 170 final boolean negative = buffer[offset] == (byte) 0xff; 171 if (length < 9) { 172 return parseBinaryLong(buffer, offset, length, negative); 173 } 174 return parseBinaryBigInteger(buffer, offset, length, negative); 175 } 176 177 private static long parseBinaryLong(final byte[] buffer, final int offset, 178 final int length, 179 final boolean negative) { 180 if (length >= 9) { 181 throw new IllegalArgumentException("At offset " + offset + ", " 182 + length + " byte binary number" 183 + " exceeds maximum signed long" 184 + " value"); 185 } 186 long val = 0; 187 for (int i = 1; i < length; i++) { 188 val = (val << 8) + (buffer[offset + i] & 0xff); 189 } 190 if (negative) { 191 // 2's complement 192 val--; 193 val ^= (long) Math.pow(2, (length - 1) * 8) - 1; 194 } 195 return negative ? -val : val; 196 } 197 198 private static long parseBinaryBigInteger(final byte[] buffer, 199 final int offset, 200 final int length, 201 final boolean negative) { 202 byte[] remainder = new byte[length - 1]; 203 System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); 204 BigInteger val = new BigInteger(remainder); 205 if (negative) { 206 // 2's complement 207 val = val.add(BigInteger.valueOf(-1)).not(); 208 } 209 if (val.bitLength() > 63) { 210 throw new IllegalArgumentException("At offset " + offset + ", " 211 + length + " byte binary number" 212 + " exceeds maximum signed long" 213 + " value"); 214 } 215 return negative ? -val.longValue() : val.longValue(); 216 } 217 218 /** 219 * Parse a boolean byte from a buffer. 220 * Leading spaces and NUL are ignored. 221 * The buffer may contain trailing spaces or NULs. 222 * 223 * @param buffer The buffer from which to parse. 224 * @param offset The offset into the buffer from which to parse. 225 * @return The boolean value of the bytes. 226 * @throws IllegalArgumentException if an invalid byte is detected. 227 */ 228 public static boolean parseBoolean(final byte[] buffer, final int offset) { 229 return buffer[offset] == 1; 230 } 231 232 // Helper method to generate the exception message 233 private static String exceptionMessage(byte[] buffer, final int offset, 234 final int length, int current, final byte currentByte) { 235 // default charset is good enough for an exception message, 236 // 237 // the alternative was to modify parseOctal and 238 // parseOctalOrBinary to receive the ZipEncoding of the 239 // archive (deprecating the existing public methods, of 240 // course) and dealing with the fact that ZipEncoding#decode 241 // can throw an IOException which parseOctal* doesn't declare 242 String string = new String(buffer, offset, length); 243 244 string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed 245 final String s = "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length; 246 return s; 247 } 248 249 /** 250 * Parse an entry name from a buffer. 251 * Parsing stops when a NUL is found 252 * or the buffer length is reached. 253 * 254 * @param buffer The buffer from which to parse. 255 * @param offset The offset into the buffer from which to parse. 256 * @param length The maximum number of bytes to parse. 257 * @return The entry name. 258 */ 259 public static String parseName(byte[] buffer, final int offset, final int length) { 260 try { 261 return parseName(buffer, offset, length, DEFAULT_ENCODING); 262 } catch (IOException ex) { 263 try { 264 return parseName(buffer, offset, length, FALLBACK_ENCODING); 265 } catch (IOException ex2) { 266 // impossible 267 throw new RuntimeException(ex2); 268 } 269 } 270 } 271 272 /** 273 * Parse an entry name from a buffer. 274 * Parsing stops when a NUL is found 275 * or the buffer length is reached. 276 * 277 * @param buffer The buffer from which to parse. 278 * @param offset The offset into the buffer from which to parse. 279 * @param length The maximum number of bytes to parse. 280 * @param encoding name of the encoding to use for file names 281 * @since 1.4 282 * @return The entry name. 283 */ 284 public static String parseName(byte[] buffer, final int offset, 285 final int length, 286 final ZipEncoding encoding) 287 throws IOException { 288 289 int len = length; 290 for (; len > 0; len--) { 291 if (buffer[offset + len - 1] != 0) { 292 break; 293 } 294 } 295 if (len > 0) { 296 byte[] b = new byte[len]; 297 System.arraycopy(buffer, offset, b, 0, len); 298 return encoding.decode(b); 299 } 300 return ""; 301 } 302 303 /** 304 * Copy a name into a buffer. 305 * Copies characters from the name into the buffer 306 * starting at the specified offset. 307 * If the buffer is longer than the name, the buffer 308 * is filled with trailing NULs. 309 * If the name is longer than the buffer, 310 * the output is truncated. 311 * 312 * @param name The header name from which to copy the characters. 313 * @param buf The buffer where the name is to be stored. 314 * @param offset The starting offset into the buffer 315 * @param length The maximum number of header bytes to copy. 316 * @return The updated offset, i.e. offset + length 317 */ 318 public static int formatNameBytes(String name, byte[] buf, final int offset, final int length) { 319 try { 320 return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); 321 } catch (IOException ex) { 322 try { 323 return formatNameBytes(name, buf, offset, length, 324 FALLBACK_ENCODING); 325 } catch (IOException ex2) { 326 // impossible 327 throw new RuntimeException(ex2); 328 } 329 } 330 } 331 332 /** 333 * Copy a name into a buffer. 334 * Copies characters from the name into the buffer 335 * starting at the specified offset. 336 * If the buffer is longer than the name, the buffer 337 * is filled with trailing NULs. 338 * If the name is longer than the buffer, 339 * the output is truncated. 340 * 341 * @param name The header name from which to copy the characters. 342 * @param buf The buffer where the name is to be stored. 343 * @param offset The starting offset into the buffer 344 * @param length The maximum number of header bytes to copy. 345 * @param encoding name of the encoding to use for file names 346 * @since 1.4 347 * @return The updated offset, i.e. offset + length 348 */ 349 public static int formatNameBytes(String name, byte[] buf, final int offset, 350 final int length, 351 final ZipEncoding encoding) 352 throws IOException { 353 int len = name.length(); 354 ByteBuffer b = encoding.encode(name); 355 while (b.limit() > length && len > 0) { 356 b = encoding.encode(name.substring(0, --len)); 357 } 358 final int limit = b.limit() - b.position(); 359 System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); 360 361 // Pad any remaining output bytes with NUL 362 for (int i = limit; i < length; ++i) { 363 buf[offset + i] = 0; 364 } 365 366 return offset + length; 367 } 368 369 /** 370 * Fill buffer with unsigned octal number, padded with leading zeroes. 371 * 372 * @param value number to convert to octal - treated as unsigned 373 * @param buffer destination buffer 374 * @param offset starting offset in buffer 375 * @param length length of buffer to fill 376 * @throws IllegalArgumentException if the value will not fit in the buffer 377 */ 378 public static void formatUnsignedOctalString(final long value, byte[] buffer, 379 final int offset, final int length) { 380 int remaining = length; 381 remaining--; 382 if (value == 0) { 383 buffer[offset + remaining--] = (byte) '0'; 384 } else { 385 long val = value; 386 for (; remaining >= 0 && val != 0; --remaining) { 387 // CheckStyle:MagicNumber OFF 388 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); 389 val = val >>> 3; 390 // CheckStyle:MagicNumber ON 391 } 392 if (val != 0){ 393 throw new IllegalArgumentException 394 (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length); 395 } 396 } 397 398 for (; remaining >= 0; --remaining) { // leading zeros 399 buffer[offset + remaining] = (byte) '0'; 400 } 401 } 402 403 /** 404 * Write an octal integer into a buffer. 405 * 406 * Uses {@link #formatUnsignedOctalString} to format 407 * the value as an octal string with leading zeros. 408 * The converted number is followed by space and NUL 409 * 410 * @param value The value to write 411 * @param buf The buffer to receive the output 412 * @param offset The starting offset into the buffer 413 * @param length The size of the output buffer 414 * @return The updated offset, i.e offset+length 415 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 416 */ 417 public static int formatOctalBytes(final long value, byte[] buf, final int offset, final int length) { 418 419 int idx=length-2; // For space and trailing null 420 formatUnsignedOctalString(value, buf, offset, idx); 421 422 buf[offset + idx++] = (byte) ' '; // Trailing space 423 buf[offset + idx] = 0; // Trailing null 424 425 return offset + length; 426 } 427 428 /** 429 * Write an octal long integer into a buffer. 430 * 431 * Uses {@link #formatUnsignedOctalString} to format 432 * the value as an octal string with leading zeros. 433 * The converted number is followed by a space. 434 * 435 * @param value The value to write as octal 436 * @param buf The destinationbuffer. 437 * @param offset The starting offset into the buffer. 438 * @param length The length of the buffer 439 * @return The updated offset 440 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 441 */ 442 public static int formatLongOctalBytes(final long value, byte[] buf, final int offset, final int length) { 443 444 int idx=length-1; // For space 445 446 formatUnsignedOctalString(value, buf, offset, idx); 447 buf[offset + idx] = (byte) ' '; // Trailing space 448 449 return offset + length; 450 } 451 452 /** 453 * Write an long integer into a buffer as an octal string if this 454 * will fit, or as a binary number otherwise. 455 * 456 * Uses {@link #formatUnsignedOctalString} to format 457 * the value as an octal string with leading zeros. 458 * The converted number is followed by a space. 459 * 460 * @param value The value to write into the buffer. 461 * @param buf The destination buffer. 462 * @param offset The starting offset into the buffer. 463 * @param length The length of the buffer. 464 * @return The updated offset. 465 * @throws IllegalArgumentException if the value (and trailer) 466 * will not fit in the buffer. 467 * @since 1.4 468 */ 469 public static int formatLongOctalOrBinaryBytes( 470 final long value, byte[] buf, final int offset, final int length) { 471 472 // Check whether we are dealing with UID/GID or SIZE field 473 final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE; 474 475 final boolean negative = value < 0; 476 if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars 477 return formatLongOctalBytes(value, buf, offset, length); 478 } 479 480 if (length < 9) { 481 formatLongBinary(value, buf, offset, length, negative); 482 } 483 formatBigIntegerBinary(value, buf, offset, length, negative); 484 485 buf[offset] = (byte) (negative ? 0xff : 0x80); 486 return offset + length; 487 } 488 489 private static void formatLongBinary(final long value, byte[] buf, 490 final int offset, final int length, 491 final boolean negative) { 492 final int bits = (length - 1) * 8; 493 final long max = 1l << bits; 494 long val = Math.abs(value); 495 if (val >= max) { 496 throw new IllegalArgumentException("Value " + value + 497 " is too large for " + length + " byte field."); 498 } 499 if (negative) { 500 val ^= max - 1; 501 val |= 0xff << bits; 502 val++; 503 } 504 for (int i = offset + length - 1; i >= offset; i--) { 505 buf[i] = (byte) val; 506 val >>= 8; 507 } 508 } 509 510 private static void formatBigIntegerBinary(final long value, byte[] buf, 511 final int offset, 512 final int length, 513 final boolean negative) { 514 BigInteger val = BigInteger.valueOf(value); 515 final byte[] b = val.toByteArray(); 516 final int len = b.length; 517 final int off = offset + length - len; 518 System.arraycopy(b, 0, buf, off, len); 519 final byte fill = (byte) (negative ? 0xff : 0); 520 for (int i = offset + 1; i < off; i++) { 521 buf[i] = fill; 522 } 523 } 524 525 /** 526 * Writes an octal value into a buffer. 527 * 528 * Uses {@link #formatUnsignedOctalString} to format 529 * the value as an octal string with leading zeros. 530 * The converted number is followed by NUL and then space. 531 * 532 * @param value The value to convert 533 * @param buf The destination buffer 534 * @param offset The starting offset into the buffer. 535 * @param length The size of the buffer. 536 * @return The updated value of offset, i.e. offset+length 537 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 538 */ 539 public static int formatCheckSumOctalBytes(final long value, byte[] buf, final int offset, final int length) { 540 541 int idx=length-2; // for NUL and space 542 formatUnsignedOctalString(value, buf, offset, idx); 543 544 buf[offset + idx++] = 0; // Trailing null 545 buf[offset + idx] = (byte) ' '; // Trailing space 546 547 return offset + length; 548 } 549 550 /** 551 * Compute the checksum of a tar entry header. 552 * 553 * @param buf The tar entry's header buffer. 554 * @return The computed checksum. 555 */ 556 public static long computeCheckSum(final byte[] buf) { 557 long sum = 0; 558 559 for (byte element : buf) { 560 sum += BYTE_MASK & element; 561 } 562 563 return sum; 564 } 565 566 /** 567 * Wikipedia <a href="http://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>: 568 * <blockquote> 569 * The checksum is calculated by taking the sum of the unsigned byte values 570 * of the header block with the eight checksum bytes taken to be ascii 571 * spaces (decimal value 32). It is stored as a six digit octal number with 572 * leading zeroes followed by a NUL and then a space. Various 573 * implementations do not adhere to this format. For better compatibility, 574 * ignore leading and trailing whitespace, and get the first six digits. In 575 * addition, some historic tar implementations treated bytes as signed. 576 * Implementations typically calculate the checksum both ways, and treat it 577 * as good if either the signed or unsigned sum matches the included 578 * checksum. 579 * </blockquote> 580 * <p> 581 * In addition there are 582 * <a href="https://issues.apache.org/jira/browse/COMPRESS-117">some tar files</a> 583 * that seem to have parts of their header cleared to zero (no detectable 584 * magic bytes, etc.) but still have a reasonable-looking checksum field 585 * present. It looks like we can detect such cases reasonably well by 586 * checking whether the stored checksum is <em>greater than</em> the 587 * computed unsigned checksum. That check is unlikely to pass on some 588 * random file header, as it would need to have a valid sequence of 589 * octal digits in just the right place. 590 * <p> 591 * The return value of this method should be treated as a best-effort 592 * heuristic rather than an absolute and final truth. The checksum 593 * verification logic may well evolve over time as more special cases 594 * are encountered. 595 * 596 * @param header tar header 597 * @return whether the checksum is reasonably good 598 * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a> 599 * @since 1.5 600 */ 601 public static boolean verifyCheckSum(byte[] header) { 602 long storedSum = 0; 603 long unsignedSum = 0; 604 long signedSum = 0; 605 606 int digits = 0; 607 for (int i = 0; i < header.length; i++) { 608 byte b = header[i]; 609 if (CHKSUM_OFFSET <= i && i < CHKSUM_OFFSET + CHKSUMLEN) { 610 if ('0' <= b && b <= '7' && digits++ < 6) { 611 storedSum = storedSum * 8 + b - '0'; 612 } else if (digits > 0) { 613 digits = 6; // only look at the first octal digit sequence 614 } 615 b = ' '; 616 } 617 unsignedSum += 0xff & b; 618 signedSum += b; 619 } 620 621 return storedSum == unsignedSum || storedSum == signedSum 622 || storedSum > unsignedSum; // COMPRESS-177 623 } 624 625}