View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  import java.util.zip.Checksum;
24  
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.util.ByteBufferUtils;
27  import org.apache.hadoop.hbase.util.Bytes;
28  import org.apache.hadoop.hbase.util.ChecksumType;
29  
30  /**
31   * Utility methods to compute and validate checksums.
32   */
33  @InterfaceAudience.Private
34  public class ChecksumUtil {
35  
36    /** This is used to reserve space in a byte buffer */
37    private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
38  
39    /** 
40     * This is used by unit tests to make checksum failures throw an 
41     * exception instead of returning null. Returning a null value from 
42     * checksum validation will cause the higher layer to retry that 
43     * read with hdfs-level checksums. Instead, we would like checksum 
44     * failures to cause the entire unit test to fail.
45     */
46    private static boolean generateExceptions = false;
47  
48    /**
49     * Generates a checksum for all the data in indata. The checksum is
50     * written to outdata.
51     * @param indata input data stream
52     * @param startOffset starting offset in the indata stream from where to
53     *                    compute checkums from
54     * @param endOffset ending offset in the indata stream upto
55     *                   which checksums needs to be computed
56     * @param outdata the output buffer where checksum values are written
57     * @param outOffset the starting offset in the outdata where the
58     *                  checksum values are written
59     * @param checksumType type of checksum
60     * @param bytesPerChecksum number of bytes per checksum value
61     */
62    static void generateChecksums(byte[] indata,
63      int startOffset, int endOffset, 
64      byte[] outdata, int outOffset,
65      ChecksumType checksumType,
66      int bytesPerChecksum) throws IOException {
67  
68      if (checksumType == ChecksumType.NULL) {
69        return; // No checkums for this block.
70      }
71  
72      Checksum checksum = checksumType.getChecksumObject();
73      int bytesLeft = endOffset - startOffset;
74      int chunkNum = 0;
75  
76      while (bytesLeft > 0) {
77        // generate the checksum for one chunk
78        checksum.reset();
79        int count = Math.min(bytesLeft, bytesPerChecksum);
80        checksum.update(indata, startOffset, count);
81  
82        // write the checksum value to the output buffer.
83        int cksumValue = (int)checksum.getValue();
84        outOffset = Bytes.putInt(outdata, outOffset, cksumValue);
85        chunkNum++;
86        startOffset += count;
87        bytesLeft -= count;
88      }
89    }
90  
91    /**
92     * Validates that the data in the specified HFileBlock matches the
93     * checksum.  Generates the checksum for the data and
94     * then validate that it matches the value stored in the header.
95     * If there is a checksum mismatch, then return false. Otherwise
96     * return true.
97     * The header is extracted from the specified HFileBlock while the
98     * data-to-be-verified is extracted from 'data'.
99     */
100   static boolean validateBlockChecksum(String pathName, HFileBlock block, 
101     byte[] data, int hdrSize) throws IOException {
102 
103     // If this is an older version of the block that does not have
104     // checksums, then return false indicating that checksum verification
105     // did not succeed. Actually, this methiod should never be called
106     // when the minorVersion is 0, thus this is a defensive check for a
107     // cannot-happen case. Since this is a cannot-happen case, it is
108     // better to return false to indicate a checksum validation failure.
109     if (!block.getHFileContext().isUseHBaseChecksum()) {
110       return false;
111     }
112 
113     // Get a checksum object based on the type of checksum that is
114     // set in the HFileBlock header. A ChecksumType.NULL indicates that 
115     // the caller is not interested in validating checksums, so we
116     // always return true.
117     ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
118     if (cktype == ChecksumType.NULL) {
119       return true; // No checkums validations needed for this block.
120     }
121     Checksum checksumObject = cktype.getChecksumObject();
122     checksumObject.reset();
123 
124     // read in the stored value of the checksum size from the header.
125     int bytesPerChecksum = block.getBytesPerChecksum();
126 
127     // bytesPerChecksum is always larger than the size of the header
128     if (bytesPerChecksum < hdrSize) {
129       String msg = "Unsupported value of bytesPerChecksum. " +
130                    " Minimum is " + hdrSize + 
131                    " but the configured value is " + bytesPerChecksum;
132       HFile.LOG.warn(msg);
133       return false;   // cannot happen case, unable to verify checksum
134     }
135     // Extract the header and compute checksum for the header.
136     ByteBuffer hdr = block.getBufferWithHeader();
137     if (hdr.hasArray()) {
138       checksumObject.update(hdr.array(), hdr.arrayOffset(), hdrSize);
139     } else {
140       checksumObject.update(ByteBufferUtils.toBytes(hdr, 0, hdrSize), 0, hdrSize);
141     }
142 
143     int off = hdrSize;
144     int consumed = hdrSize;
145     int bytesLeft = block.getOnDiskDataSizeWithHeader() - off;
146     int cksumOffset = block.getOnDiskDataSizeWithHeader();
147     
148     // validate each chunk
149     while (bytesLeft > 0) {
150       int thisChunkSize = bytesPerChecksum - consumed;
151       int count = Math.min(bytesLeft, thisChunkSize);
152       checksumObject.update(data, off, count);
153 
154       int storedChecksum = Bytes.toInt(data, cksumOffset);
155       if (storedChecksum != (int)checksumObject.getValue()) {
156         String msg = "File " + pathName +
157                      " Stored checksum value of " + storedChecksum +
158                      " at offset " + cksumOffset +
159                      " does not match computed checksum " +
160                      checksumObject.getValue() +
161                      ", total data size " + data.length +
162                      " Checksum data range offset " + off + " len " + count +
163                      HFileBlock.toStringHeader(block.getBufferReadOnly());
164         HFile.LOG.warn(msg);
165         if (generateExceptions) {
166           throw new IOException(msg); // this is only for unit tests
167         } else {
168           return false;               // checksum validation failure
169         }
170       }
171       cksumOffset += HFileBlock.CHECKSUM_SIZE;
172       bytesLeft -= count; 
173       off += count;
174       consumed = 0;
175       checksumObject.reset();
176     }
177     return true; // checksum is valid
178   }
179 
180   /**
181    * Returns the number of bytes needed to store the checksums for
182    * a specified data size
183    * @param datasize number of bytes of data
184    * @param bytesPerChecksum number of bytes in a checksum chunk
185    * @return The number of bytes needed to store the checksum values
186    */
187   static long numBytes(long datasize, int bytesPerChecksum) {
188     return numChunks(datasize, bytesPerChecksum) * 
189                      HFileBlock.CHECKSUM_SIZE;
190   }
191 
192   /**
193    * Returns the number of checksum chunks needed to store the checksums for
194    * a specified data size
195    * @param datasize number of bytes of data
196    * @param bytesPerChecksum number of bytes in a checksum chunk
197    * @return The number of checksum chunks
198    */
199   static long numChunks(long datasize, int bytesPerChecksum) {
200     long numChunks = datasize/bytesPerChecksum;
201     if (datasize % bytesPerChecksum != 0) {
202       numChunks++;
203     }
204     return numChunks;
205   }
206 
207   /**
208    * Write dummy checksums to the end of the specified bytes array
209    * to reserve space for writing checksums later
210    * @param baos OutputStream to write dummy checkum values
211    * @param numBytes Number of bytes of data for which dummy checksums
212    *                 need to be generated
213    * @param bytesPerChecksum Number of bytes per checksum value
214    */
215   static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
216     int numBytes, int bytesPerChecksum) throws IOException {
217     long numChunks = numChunks(numBytes, bytesPerChecksum);
218     long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
219     while (bytesLeft > 0) {
220       long count = Math.min(bytesLeft, DUMMY_VALUE.length);
221       baos.write(DUMMY_VALUE, 0, (int)count);
222       bytesLeft -= count;
223     }
224   }
225 
226   /**
227    * Mechanism to throw an exception in case of hbase checksum
228    * failure. This is used by unit tests only.
229    * @param value Setting this to true will cause hbase checksum
230    *              verification failures to generate exceptions.
231    */
232   public static void generateExceptionForChecksumFailureForTest(boolean value) {
233     generateExceptions = value;
234   }
235 }
236