001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.tar;
020
021import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN;
022import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET;
023
024import java.io.IOException;
025import java.math.BigInteger;
026import java.nio.ByteBuffer;
027import org.apache.commons.compress.archivers.zip.ZipEncoding;
028import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
029
030/**
031 * This class provides static utility methods to work with byte streams.
032 *
033 * @Immutable
034 */
035// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
036public class TarUtils {
037
038    private static final int BYTE_MASK = 255;
039
040    static final ZipEncoding DEFAULT_ENCODING =
041        ZipEncodingHelper.getZipEncoding(null);
042
043    /**
044     * Encapsulates the algorithms used up to Commons Compress 1.3 as
045     * ZipEncoding.
046     */
047    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
048            public boolean canEncode(String name) { return true; }
049
050            public ByteBuffer encode(String name) {
051                final int length = name.length();
052                byte[] buf = new byte[length];
053
054                // copy until end of input or output is reached.
055                for (int i = 0; i < length; ++i) {
056                    buf[i] = (byte) name.charAt(i);
057                }
058                return ByteBuffer.wrap(buf);
059            }
060
061            public String decode(byte[] buffer) {
062                final int length = buffer.length;
063                StringBuilder result = new StringBuilder(length);
064
065                for (byte b : buffer) {
066                    if (b == 0) { // Trailing null
067                        break;
068                    }
069                    result.append((char) (b & 0xFF)); // Allow for sign-extension
070                }
071
072                return result.toString();
073            }
074        };
075
076    /** Private constructor to prevent instantiation of this utility class. */
077    private TarUtils(){
078    }
079
080    /**
081     * Parse an octal string from a buffer.
082     *
083     * <p>Leading spaces are ignored.
084     * The buffer must contain a trailing space or NUL,
085     * and may contain an additional trailing space or NUL.</p>
086     *
087     * <p>The input buffer is allowed to contain all NULs,
088     * in which case the method returns 0L
089     * (this allows for missing fields).</p>
090     *
091     * <p>To work-around some tar implementations that insert a
092     * leading NUL this method returns 0 if it detects a leading NUL
093     * since Commons Compress 1.4.</p>
094     *
095     * @param buffer The buffer from which to parse.
096     * @param offset The offset into the buffer from which to parse.
097     * @param length The maximum number of bytes to parse - must be at least 2 bytes.
098     * @return The long value of the octal string.
099     * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected.
100     */
101    public static long parseOctal(final byte[] buffer, final int offset, final int length) {
102        long    result = 0;
103        int     end = offset + length;
104        int     start = offset;
105
106        if (length < 2){
107            throw new IllegalArgumentException("Length "+length+" must be at least 2");
108        }
109
110        if (buffer[start] == 0) {
111            return 0L;
112        }
113
114        // Skip leading spaces
115        while (start < end){
116            if (buffer[start] == ' '){
117                start++;
118            } else {
119                break;
120            }
121        }
122
123        // Trim all trailing NULs and spaces.
124        // The ustar and POSIX tar specs require a trailing NUL or
125        // space but some implementations use the extra digit for big
126        // sizes/uids/gids ...
127        byte trailer = buffer[end - 1];
128        while (start < end && (trailer == 0 || trailer == ' ')) {
129            end--;
130            trailer = buffer[end - 1];
131        }
132
133        for ( ;start < end; start++) {
134            final byte currentByte = buffer[start];
135            // CheckStyle:MagicNumber OFF
136            if (currentByte < '0' || currentByte > '7'){
137                throw new IllegalArgumentException(
138                        exceptionMessage(buffer, offset, length, start, currentByte));
139            }
140            result = (result << 3) + (currentByte - '0'); // convert from ASCII
141            // CheckStyle:MagicNumber ON
142        }
143
144        return result;
145    }
146
147    /** 
148     * Compute the value contained in a byte buffer.  If the most
149     * significant bit of the first byte in the buffer is set, this
150     * bit is ignored and the rest of the buffer is interpreted as a
151     * binary number.  Otherwise, the buffer is interpreted as an
152     * octal number as per the parseOctal function above.
153     *
154     * @param buffer The buffer from which to parse.
155     * @param offset The offset into the buffer from which to parse.
156     * @param length The maximum number of bytes to parse.
157     * @return The long value of the octal or binary string.
158     * @throws IllegalArgumentException if the trailing space/NUL is
159     * missing or an invalid byte is detected in an octal number, or
160     * if a binary number would exceed the size of a signed long
161     * 64-bit integer.
162     * @since 1.4
163     */
164    public static long parseOctalOrBinary(final byte[] buffer, final int offset,
165                                          final int length) {
166
167        if ((buffer[offset] & 0x80) == 0) {
168            return parseOctal(buffer, offset, length);
169        }
170        final boolean negative = buffer[offset] == (byte) 0xff;
171        if (length < 9) {
172            return parseBinaryLong(buffer, offset, length, negative);
173        }
174        return parseBinaryBigInteger(buffer, offset, length, negative);
175    }
176
177    private static long parseBinaryLong(final byte[] buffer, final int offset,
178                                        final int length,
179                                        final boolean negative) {
180        if (length >= 9) {
181            throw new IllegalArgumentException("At offset " + offset + ", "
182                                               + length + " byte binary number"
183                                               + " exceeds maximum signed long"
184                                               + " value");
185        }
186        long val = 0;
187        for (int i = 1; i < length; i++) {
188            val = (val << 8) + (buffer[offset + i] & 0xff);
189        }
190        if (negative) {
191            // 2's complement
192            val--;
193            val ^= (long) Math.pow(2, (length - 1) * 8) - 1;
194        }
195        return negative ? -val : val;
196    }
197
198    private static long parseBinaryBigInteger(final byte[] buffer,
199                                              final int offset,
200                                              final int length,
201                                              final boolean negative) {
202        byte[] remainder = new byte[length - 1];
203        System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
204        BigInteger val = new BigInteger(remainder);
205        if (negative) {
206            // 2's complement
207            val = val.add(BigInteger.valueOf(-1)).not();
208        }
209        if (val.bitLength() > 63) {
210            throw new IllegalArgumentException("At offset " + offset + ", "
211                                               + length + " byte binary number"
212                                               + " exceeds maximum signed long"
213                                               + " value");
214        }
215        return negative ? -val.longValue() : val.longValue();
216    }
217
218    /**
219     * Parse a boolean byte from a buffer.
220     * Leading spaces and NUL are ignored.
221     * The buffer may contain trailing spaces or NULs.
222     *
223     * @param buffer The buffer from which to parse.
224     * @param offset The offset into the buffer from which to parse.
225     * @return The boolean value of the bytes.
226     * @throws IllegalArgumentException if an invalid byte is detected.
227     */
228    public static boolean parseBoolean(final byte[] buffer, final int offset) {
229        return buffer[offset] == 1;
230    }
231
232    // Helper method to generate the exception message
233    private static String exceptionMessage(byte[] buffer, final int offset,
234            final int length, int current, final byte currentByte) {
235        // default charset is good enough for an exception message,
236        //
237        // the alternative was to modify parseOctal and
238        // parseOctalOrBinary to receive the ZipEncoding of the
239        // archive (deprecating the existing public methods, of
240        // course) and dealing with the fact that ZipEncoding#decode
241        // can throw an IOException which parseOctal* doesn't declare
242        String string = new String(buffer, offset, length);
243
244        string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed
245        final String s = "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length;
246        return s;
247    }
248
249    /**
250     * Parse an entry name from a buffer.
251     * Parsing stops when a NUL is found
252     * or the buffer length is reached.
253     *
254     * @param buffer The buffer from which to parse.
255     * @param offset The offset into the buffer from which to parse.
256     * @param length The maximum number of bytes to parse.
257     * @return The entry name.
258     */
259    public static String parseName(byte[] buffer, final int offset, final int length) {
260        try {
261            return parseName(buffer, offset, length, DEFAULT_ENCODING);
262        } catch (IOException ex) {
263            try {
264                return parseName(buffer, offset, length, FALLBACK_ENCODING);
265            } catch (IOException ex2) {
266                // impossible
267                throw new RuntimeException(ex2);
268            }
269        }
270    }
271
272    /**
273     * Parse an entry name from a buffer.
274     * Parsing stops when a NUL is found
275     * or the buffer length is reached.
276     *
277     * @param buffer The buffer from which to parse.
278     * @param offset The offset into the buffer from which to parse.
279     * @param length The maximum number of bytes to parse.
280     * @param encoding name of the encoding to use for file names
281     * @since 1.4
282     * @return The entry name.
283     */
284    public static String parseName(byte[] buffer, final int offset,
285                                   final int length,
286                                   final ZipEncoding encoding)
287        throws IOException {
288
289        int len = length;
290        for (; len > 0; len--) {
291            if (buffer[offset + len - 1] != 0) {
292                break;
293            }
294        }
295        if (len > 0) {
296            byte[] b = new byte[len];
297            System.arraycopy(buffer, offset, b, 0, len);
298            return encoding.decode(b);
299        }
300        return "";
301    }
302
303    /**
304     * Copy a name into a buffer.
305     * Copies characters from the name into the buffer
306     * starting at the specified offset. 
307     * If the buffer is longer than the name, the buffer
308     * is filled with trailing NULs.
309     * If the name is longer than the buffer,
310     * the output is truncated.
311     *
312     * @param name The header name from which to copy the characters.
313     * @param buf The buffer where the name is to be stored.
314     * @param offset The starting offset into the buffer
315     * @param length The maximum number of header bytes to copy.
316     * @return The updated offset, i.e. offset + length
317     */
318    public static int formatNameBytes(String name, byte[] buf, final int offset, final int length) {
319        try {
320            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
321        } catch (IOException ex) {
322            try {
323                return formatNameBytes(name, buf, offset, length,
324                                       FALLBACK_ENCODING);
325            } catch (IOException ex2) {
326                // impossible
327                throw new RuntimeException(ex2);
328            }
329        }
330    }
331
332    /**
333     * Copy a name into a buffer.
334     * Copies characters from the name into the buffer
335     * starting at the specified offset. 
336     * If the buffer is longer than the name, the buffer
337     * is filled with trailing NULs.
338     * If the name is longer than the buffer,
339     * the output is truncated.
340     *
341     * @param name The header name from which to copy the characters.
342     * @param buf The buffer where the name is to be stored.
343     * @param offset The starting offset into the buffer
344     * @param length The maximum number of header bytes to copy.
345     * @param encoding name of the encoding to use for file names
346     * @since 1.4
347     * @return The updated offset, i.e. offset + length
348     */
349    public static int formatNameBytes(String name, byte[] buf, final int offset,
350                                      final int length,
351                                      final ZipEncoding encoding)
352        throws IOException {
353        int len = name.length();
354        ByteBuffer b = encoding.encode(name);
355        while (b.limit() > length && len > 0) {
356            b = encoding.encode(name.substring(0, --len));
357        }
358        final int limit = b.limit() - b.position();
359        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
360
361        // Pad any remaining output bytes with NUL
362        for (int i = limit; i < length; ++i) {
363            buf[offset + i] = 0;
364        }
365
366        return offset + length;
367    }
368
369    /**
370     * Fill buffer with unsigned octal number, padded with leading zeroes.
371     * 
372     * @param value number to convert to octal - treated as unsigned
373     * @param buffer destination buffer
374     * @param offset starting offset in buffer
375     * @param length length of buffer to fill
376     * @throws IllegalArgumentException if the value will not fit in the buffer
377     */
378    public static void formatUnsignedOctalString(final long value, byte[] buffer,
379            final int offset, final int length) {
380        int remaining = length;
381        remaining--;
382        if (value == 0) {
383            buffer[offset + remaining--] = (byte) '0';
384        } else {
385            long val = value;
386            for (; remaining >= 0 && val != 0; --remaining) {
387                // CheckStyle:MagicNumber OFF
388                buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
389                val = val >>> 3;
390                // CheckStyle:MagicNumber ON
391            }
392            if (val != 0){
393                throw new IllegalArgumentException
394                (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length);
395            }
396        }
397
398        for (; remaining >= 0; --remaining) { // leading zeros
399            buffer[offset + remaining] = (byte) '0';
400        }
401    }
402
403    /**
404     * Write an octal integer into a buffer.
405     *
406     * Uses {@link #formatUnsignedOctalString} to format
407     * the value as an octal string with leading zeros.
408     * The converted number is followed by space and NUL
409     * 
410     * @param value The value to write
411     * @param buf The buffer to receive the output
412     * @param offset The starting offset into the buffer
413     * @param length The size of the output buffer
414     * @return The updated offset, i.e offset+length
415     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
416     */
417    public static int formatOctalBytes(final long value, byte[] buf, final int offset, final int length) {
418
419        int idx=length-2; // For space and trailing null
420        formatUnsignedOctalString(value, buf, offset, idx);
421
422        buf[offset + idx++] = (byte) ' '; // Trailing space
423        buf[offset + idx]   = 0; // Trailing null
424
425        return offset + length;
426    }
427
428    /**
429     * Write an octal long integer into a buffer.
430     * 
431     * Uses {@link #formatUnsignedOctalString} to format
432     * the value as an octal string with leading zeros.
433     * The converted number is followed by a space.
434     * 
435     * @param value The value to write as octal
436     * @param buf The destinationbuffer.
437     * @param offset The starting offset into the buffer.
438     * @param length The length of the buffer
439     * @return The updated offset
440     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
441     */
442    public static int formatLongOctalBytes(final long value, byte[] buf, final int offset, final int length) {
443
444        int idx=length-1; // For space
445
446        formatUnsignedOctalString(value, buf, offset, idx);
447        buf[offset + idx] = (byte) ' '; // Trailing space
448
449        return offset + length;
450    }
451
452    /**
453     * Write an long integer into a buffer as an octal string if this
454     * will fit, or as a binary number otherwise.
455     * 
456     * Uses {@link #formatUnsignedOctalString} to format
457     * the value as an octal string with leading zeros.
458     * The converted number is followed by a space.
459     * 
460     * @param value The value to write into the buffer.
461     * @param buf The destination buffer.
462     * @param offset The starting offset into the buffer.
463     * @param length The length of the buffer.
464     * @return The updated offset.
465     * @throws IllegalArgumentException if the value (and trailer)
466     * will not fit in the buffer.
467     * @since 1.4
468     */
469    public static int formatLongOctalOrBinaryBytes(
470        final long value, byte[] buf, final int offset, final int length) {
471
472        // Check whether we are dealing with UID/GID or SIZE field
473        final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
474
475        final boolean negative = value < 0;
476        if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
477            return formatLongOctalBytes(value, buf, offset, length);
478        }
479
480        if (length < 9) {
481            formatLongBinary(value, buf, offset, length, negative);
482        }
483        formatBigIntegerBinary(value, buf, offset, length, negative);
484
485        buf[offset] = (byte) (negative ? 0xff : 0x80);
486        return offset + length;
487    }
488
489    private static void formatLongBinary(final long value, byte[] buf,
490                                         final int offset, final int length,
491                                         final boolean negative) {
492        final int bits = (length - 1) * 8;
493        final long max = 1l << bits;
494        long val = Math.abs(value);
495        if (val >= max) {
496            throw new IllegalArgumentException("Value " + value +
497                " is too large for " + length + " byte field.");
498        }
499        if (negative) {
500            val ^= max - 1;
501            val |= 0xff << bits;
502            val++;
503        }
504        for (int i = offset + length - 1; i >= offset; i--) {
505            buf[i] = (byte) val;
506            val >>= 8;
507        }
508    }
509
510    private static void formatBigIntegerBinary(final long value, byte[] buf,
511                                               final int offset,
512                                               final int length,
513                                               final boolean negative) {
514        BigInteger val = BigInteger.valueOf(value);
515        final byte[] b = val.toByteArray();
516        final int len = b.length;
517        final int off = offset + length - len;
518        System.arraycopy(b, 0, buf, off, len);
519        final byte fill = (byte) (negative ? 0xff : 0);
520        for (int i = offset + 1; i < off; i++) {
521            buf[i] = fill;
522        }
523    }
524
525    /**
526     * Writes an octal value into a buffer.
527     * 
528     * Uses {@link #formatUnsignedOctalString} to format
529     * the value as an octal string with leading zeros.
530     * The converted number is followed by NUL and then space.
531     *
532     * @param value The value to convert
533     * @param buf The destination buffer
534     * @param offset The starting offset into the buffer.
535     * @param length The size of the buffer.
536     * @return The updated value of offset, i.e. offset+length
537     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
538     */
539    public static int formatCheckSumOctalBytes(final long value, byte[] buf, final int offset, final int length) {
540
541        int idx=length-2; // for NUL and space
542        formatUnsignedOctalString(value, buf, offset, idx);
543
544        buf[offset + idx++]   = 0; // Trailing null
545        buf[offset + idx]     = (byte) ' '; // Trailing space
546
547        return offset + length;
548    }
549
550    /**
551     * Compute the checksum of a tar entry header.
552     *
553     * @param buf The tar entry's header buffer.
554     * @return The computed checksum.
555     */
556    public static long computeCheckSum(final byte[] buf) {
557        long sum = 0;
558
559        for (byte element : buf) {
560            sum += BYTE_MASK & element;
561        }
562
563        return sum;
564    }
565
566    /**
567     * Wikipedia <a href="http://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>:
568     * <blockquote>
569     * The checksum is calculated by taking the sum of the unsigned byte values
570     * of the header block with the eight checksum bytes taken to be ascii
571     * spaces (decimal value 32). It is stored as a six digit octal number with
572     * leading zeroes followed by a NUL and then a space. Various
573     * implementations do not adhere to this format. For better compatibility,
574     * ignore leading and trailing whitespace, and get the first six digits. In
575     * addition, some historic tar implementations treated bytes as signed.
576     * Implementations typically calculate the checksum both ways, and treat it
577     * as good if either the signed or unsigned sum matches the included
578     * checksum.
579     * </blockquote>
580     * <p>
581     * In addition there are
582     * <a href="https://issues.apache.org/jira/browse/COMPRESS-117">some tar files</a>
583     * that seem to have parts of their header cleared to zero (no detectable
584     * magic bytes, etc.) but still have a reasonable-looking checksum field
585     * present. It looks like we can detect such cases reasonably well by
586     * checking whether the stored checksum is <em>greater than</em> the
587     * computed unsigned checksum. That check is unlikely to pass on some
588     * random file header, as it would need to have a valid sequence of
589     * octal digits in just the right place.
590     * <p>
591     * The return value of this method should be treated as a best-effort
592     * heuristic rather than an absolute and final truth. The checksum
593     * verification logic may well evolve over time as more special cases
594     * are encountered.
595     *
596     * @param header tar header
597     * @return whether the checksum is reasonably good
598     * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
599     * @since 1.5
600     */
601    public static boolean verifyCheckSum(byte[] header) {
602        long storedSum = 0;
603        long unsignedSum = 0;
604        long signedSum = 0;
605
606        int digits = 0;
607        for (int i = 0; i < header.length; i++) {
608            byte b = header[i];
609            if (CHKSUM_OFFSET  <= i && i < CHKSUM_OFFSET + CHKSUMLEN) {
610                if ('0' <= b && b <= '7' && digits++ < 6) {
611                    storedSum = storedSum * 8 + b - '0';
612                } else if (digits > 0) {
613                    digits = 6; // only look at the first octal digit sequence
614                }
615                b = ' ';
616            }
617            unsignedSum += 0xff & b;
618            signedSum += b;
619        }
620
621        return storedSum == unsignedSum || storedSum == signedSum
622                || storedSum > unsignedSum; // COMPRESS-177
623    }
624
625}