/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package java.util.zip; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import java.io.PushbackInputStream; import java.nio.ByteOrder; import java.util.Arrays; import libcore.io.Memory; import libcore.io.Streams; /** * The {@code GZIPInputStream} class is used to read data stored in the GZIP * format, reading and decompressing GZIP data from the underlying stream into * its buffer. * *
Using {@code GZIPInputStream} is easier than {@link ZipInputStream} * because GZIP is only for compression, and is not a container for multiple files. * This code decompresses the data from a GZIP stream, similar to the {@code gunzip(1)} utility. *
* InputStream is = ... * GZIPInputStream zis = new GZIPInputStream(new BufferedInputStream(is)); * try { * // Reading from 'zis' gets you the uncompressed bytes... * processStream(zis); * } finally { * zis.close(); * } ** *
Note that this class ignores all remaining data at the end of the last * GZIP member. */ public class GZIPInputStream extends InflaterInputStream { private static final int FCOMMENT = 16; private static final int FEXTRA = 4; private static final int FHCRC = 2; private static final int FNAME = 8; private static final int GZIP_TRAILER_SIZE = 8; /** * The magic header for the GZIP format. */ public static final int GZIP_MAGIC = 0x8b1f; /** * The checksum algorithm used when handling uncompressed data. */ protected CRC32 crc = new CRC32(); /** * Indicates the end of the input stream. */ protected boolean eos = false; /** * Construct a {@code GZIPInputStream} to read from GZIP data from the * underlying stream. * * @param is * the {@code InputStream} to read data from. * @throws IOException * if an {@code IOException} occurs. */ public GZIPInputStream(InputStream is) throws IOException { this(is, BUF_SIZE); } /** * Construct a {@code GZIPInputStream} to read from GZIP data from the * underlying stream. Set the internal buffer size to {@code size}. * * @param is * the {@code InputStream} to read data from. * @param size * the internal read buffer size. * @throws IOException * if an {@code IOException} occurs. */ public GZIPInputStream(InputStream is, int size) throws IOException { super(is, new Inflater(true), size); try { byte[] header = readHeader(is); final short magic = Memory.peekShort(header, 0, ByteOrder.LITTLE_ENDIAN); if (magic != (short) GZIP_MAGIC) { throw new IOException(String.format("unknown format (magic number %x)", magic)); } parseGzipHeader(is, header, crc, buf); } catch (IOException e) { close(); // release the inflater throw e; } } /** * Closes this stream and any underlying streams. */ @Override public void close() throws IOException { eos = true; super.close(); } @Override public int read(byte[] buffer, int byteOffset, int byteCount) throws IOException { if (closed) { throw new IOException("Stream is closed"); } if (eos) { return -1; } Arrays.checkOffsetAndCount(buffer.length, byteOffset, byteCount); int bytesRead; try { bytesRead = super.read(buffer, byteOffset, byteCount); } finally { eos = eof; // update eos after every read(), even when it throws } if (bytesRead != -1) { crc.update(buffer, byteOffset, bytesRead); } if (eos) { verifyCrc(); eos = maybeReadNextMember(); if (!eos) { crc.reset(); inf.reset(); eof = false; len = 0; } } return bytesRead; } private boolean maybeReadNextMember() throws IOException { // If we have any unconsumed data in the inflater buffer, we have to // scan that first. The fact that we've reached here implies we've // successfully consumed the GZIP trailer. final int remaining = inf.getRemaining() - GZIP_TRAILER_SIZE; if (remaining > 0) { // NOTE: We make sure we create a pushback stream exactly once, // even if the input stream contains multiple members. // // The push back stream we create must therefore be able to contain // (worst case) the entire buffer even though there may be fewer bytes // remaining when it is first created. if (!(in instanceof PushbackInputStream)) { in = new PushbackInputStream(in, buf.length); } ((PushbackInputStream) in).unread(buf, inf.getCurrentOffset() + GZIP_TRAILER_SIZE, remaining); } final byte[] buffer; try { buffer = readHeader(in); } catch (EOFException eof) { // We've reached the end of the stream and there are no more members // to read. Note that we might also hit this if there are fewer than // GZIP_HEADER_LENGTH bytes at the end of a member. We don't care // because we're specified to ignore all data at the end of the last // gzip record. return true; } final short magic = Memory.peekShort(buffer, 0, ByteOrder.LITTLE_ENDIAN); if (magic != (short) GZIP_MAGIC) { // Don't throw here because we've already read one valid member // from this stream. return true; } // We've encountered the gzip magic number, so we assume there's another // member in the stream. parseGzipHeader(in, buffer, crc, buf); return false; } private static byte[] readHeader(InputStream in) throws IOException { byte[] header = new byte[10]; Streams.readFully(in, header, 0, header.length); return header; } private static void parseGzipHeader(InputStream in, byte[] header, CRC32 crc, byte[] scratch) throws IOException { final byte flags = header[3]; final boolean hcrc = (flags & FHCRC) != 0; if (hcrc) { crc.update(header, 0, header.length); } if ((flags & FEXTRA) != 0) { Streams.readFully(in, header, 0, 2); if (hcrc) { crc.update(header, 0, 2); } int length = Memory.peekShort(header, 0, ByteOrder.LITTLE_ENDIAN) & 0xffff; while (length > 0) { int max = length > scratch.length ? scratch.length : length; int result = in.read(scratch, 0, max); if (result == -1) { throw new EOFException(); } if (hcrc) { crc.update(scratch, 0, result); } length -= result; } } if ((flags & FNAME) != 0) { readZeroTerminated(in, crc, hcrc); } if ((flags & FCOMMENT) != 0) { readZeroTerminated(in, crc, hcrc); } if (hcrc) { Streams.readFully(in, header, 0, 2); short crc16 = Memory.peekShort(header, 0, ByteOrder.LITTLE_ENDIAN); if ((short) crc.getValue() != crc16) { throw new IOException("CRC mismatch"); } crc.reset(); } } private void verifyCrc() throws IOException { // Get non-compressed bytes read by fill int size = inf.getRemaining(); final int trailerSize = 8; // crc (4 bytes) + total out (4 bytes) byte[] b = new byte[trailerSize]; int copySize = (size > trailerSize) ? trailerSize : size; System.arraycopy(buf, len - size, b, 0, copySize); Streams.readFully(in, b, copySize, trailerSize - copySize); if (Memory.peekInt(b, 0, ByteOrder.LITTLE_ENDIAN) != (int) crc.getValue()) { throw new IOException("CRC mismatch"); } if (Memory.peekInt(b, 4, ByteOrder.LITTLE_ENDIAN) != inf.getTotalOut()) { throw new IOException("Size mismatch"); } } private static void readZeroTerminated(InputStream in, CRC32 crc, boolean hcrc) throws IOException { int result; // TODO: Fix these single byte reads. This method is used to consume the // header FNAME & FCOMMENT which aren't widely used in gzip files. while ((result = in.read()) > 0) { if (hcrc) { crc.update(result); } } if (result == -1) { throw new EOFException(); } // Add the zero if (hcrc) { crc.update(result); } } }