View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import static org.junit.Assert.assertEquals;
20  import static org.junit.Assert.fail;
21  
22  import java.io.ByteArrayInputStream;
23  import java.io.ByteArrayOutputStream;
24  import java.io.DataInputStream;
25  import java.io.DataOutputStream;
26  import java.io.IOException;
27  import java.nio.ByteBuffer;
28  import java.util.ArrayList;
29  import java.util.Collection;
30  import java.util.List;
31  import java.util.Random;
32  
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.hbase.HConstants;
35  import org.apache.hadoop.hbase.KeyValue;
36  import org.apache.hadoop.hbase.KeyValue.Type;
37  import org.apache.hadoop.hbase.KeyValueUtil;
38  import org.apache.hadoop.hbase.testclassification.LargeTests;
39  import org.apache.hadoop.hbase.Tag;
40  import org.apache.hadoop.hbase.io.compress.Compression;
41  import org.apache.hadoop.hbase.io.hfile.HFileBlock.Writer.BufferGrabbingByteArrayOutputStream;
42  import org.apache.hadoop.hbase.io.hfile.HFileContext;
43  import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.apache.hadoop.hbase.util.test.RedundantKVGenerator;
46  import org.junit.Test;
47  import org.junit.experimental.categories.Category;
48  import org.junit.runner.RunWith;
49  import org.junit.runners.Parameterized;
50  import org.junit.runners.Parameterized.Parameters;
51  
52  /**
53   * Test all of the data block encoding algorithms for correctness. Most of the
54   * class generate data which will test different branches in code.
55   */
56  @Category(LargeTests.class)
57  @RunWith(Parameterized.class)
58  public class TestDataBlockEncoders {
59  
60    private static int NUMBER_OF_KV = 10000;
61    private static int NUM_RANDOM_SEEKS = 10000;
62  
63    private static int ENCODED_DATA_OFFSET = HConstants.HFILEBLOCK_HEADER_SIZE
64        + DataBlockEncoding.ID_SIZE;
65  
66    private RedundantKVGenerator generator = new RedundantKVGenerator();
67    private Random randomizer = new Random(42l);
68  
69    private final boolean includesMemstoreTS;
70    private final boolean includesTags;
71  
72    @Parameters
73    public static Collection<Object[]> parameters() {
74      return HBaseTestingUtility.MEMSTORETS_TAGS_PARAMETRIZED;
75    }
76    public TestDataBlockEncoders(boolean includesMemstoreTS, boolean includesTag) {
77      this.includesMemstoreTS = includesMemstoreTS;
78      this.includesTags = includesTag;
79    }
80    
81    private HFileBlockEncodingContext getEncodingContext(Compression.Algorithm algo,
82        DataBlockEncoding encoding) {
83      DataBlockEncoder encoder = encoding.getEncoder();
84      HFileContext meta = new HFileContextBuilder()
85                          .withHBaseCheckSum(false)
86                          .withIncludesMvcc(includesMemstoreTS)
87                          .withIncludesTags(includesTags)
88                          .withCompression(algo).build();
89      if (encoder != null) {
90        return encoder.newDataBlockEncodingContext(encoding,
91            HConstants.HFILEBLOCK_DUMMY_HEADER, meta);
92      } else {
93        return new HFileBlockDefaultEncodingContext(encoding,
94            HConstants.HFILEBLOCK_DUMMY_HEADER, meta);
95      }
96    }
97  
98    /**
99     * Test data block encoding of empty KeyValue.
100    * 
101    * @throws IOException
102    *           On test failure.
103    */
104   @Test
105   public void testEmptyKeyValues() throws IOException {
106     List<KeyValue> kvList = new ArrayList<KeyValue>();
107     byte[] row = new byte[0];
108     byte[] family = new byte[0];
109     byte[] qualifier = new byte[0];
110     byte[] value = new byte[0];
111     if (!includesTags) {
112       kvList.add(new KeyValue(row, family, qualifier, 0l, value));
113       kvList.add(new KeyValue(row, family, qualifier, 0l, value));
114     } else {
115       byte[] metaValue1 = Bytes.toBytes("metaValue1");
116       byte[] metaValue2 = Bytes.toBytes("metaValue2");
117       kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
118           metaValue1) }));
119       kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
120           metaValue2) }));
121     }
122     testEncodersOnDataset(kvList, includesMemstoreTS, includesTags);
123   }
124 
125   /**
126    * Test KeyValues with negative timestamp.
127    * 
128    * @throws IOException
129    *           On test failure.
130    */
131   @Test
132   public void testNegativeTimestamps() throws IOException {
133     List<KeyValue> kvList = new ArrayList<KeyValue>();
134     byte[] row = new byte[0];
135     byte[] family = new byte[0];
136     byte[] qualifier = new byte[0];
137     byte[] value = new byte[0];
138     if (includesTags) {
139       byte[] metaValue1 = Bytes.toBytes("metaValue1");
140       byte[] metaValue2 = Bytes.toBytes("metaValue2");
141       kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
142           metaValue1) }));
143       kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
144           metaValue2) }));
145     } else {
146       kvList.add(new KeyValue(row, family, qualifier, -1l, Type.Put, value));
147       kvList.add(new KeyValue(row, family, qualifier, -2l, Type.Put, value));
148     }
149     testEncodersOnDataset(kvList, includesMemstoreTS, includesTags);
150   }
151 
152 
153   /**
154    * Test whether compression -> decompression gives the consistent results on
155    * pseudorandom sample.
156    * @throws IOException On test failure.
157    */
158   @Test
159   public void testExecutionOnSample() throws IOException {
160     List<KeyValue> kvList = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
161     testEncodersOnDataset(kvList, includesMemstoreTS, includesTags);
162   }
163 
164   /**
165    * Test seeking while file is encoded.
166    */
167   @Test
168   public void testSeekingOnSample() throws IOException {
169     List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
170 
171     // create all seekers
172     List<DataBlockEncoder.EncodedSeeker> encodedSeekers = 
173         new ArrayList<DataBlockEncoder.EncodedSeeker>();
174     for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
175       DataBlockEncoder encoder = encoding.getEncoder();
176       if (encoder == null) {
177         continue;
178       }
179       ByteBuffer encodedBuffer = encodeKeyValues(encoding, sampleKv,
180           getEncodingContext(Compression.Algorithm.NONE, encoding));
181       HFileContext meta = new HFileContextBuilder()
182                           .withHBaseCheckSum(false)
183                           .withIncludesMvcc(includesMemstoreTS)
184                           .withIncludesTags(includesTags)
185                           .withCompression(Compression.Algorithm.NONE)
186                           .build();
187       DataBlockEncoder.EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR,
188           encoder.newDataBlockDecodingContext(meta));
189       seeker.setCurrentBuffer(encodedBuffer);
190       encodedSeekers.add(seeker);
191     }
192     // test it!
193     // try a few random seeks
194     for (boolean seekBefore : new boolean[] { false, true }) {
195       for (int i = 0; i < NUM_RANDOM_SEEKS; ++i) {
196         int keyValueId;
197         if (!seekBefore) {
198           keyValueId = randomizer.nextInt(sampleKv.size());
199         } else {
200           keyValueId = randomizer.nextInt(sampleKv.size() - 1) + 1;
201         }
202 
203         KeyValue keyValue = sampleKv.get(keyValueId);
204         checkSeekingConsistency(encodedSeekers, seekBefore, keyValue);
205       }
206     }
207 
208     // check edge cases
209     checkSeekingConsistency(encodedSeekers, false, sampleKv.get(0));
210     for (boolean seekBefore : new boolean[] { false, true }) {
211       checkSeekingConsistency(encodedSeekers, seekBefore, sampleKv.get(sampleKv.size() - 1));
212       KeyValue midKv = sampleKv.get(sampleKv.size() / 2);
213       KeyValue lastMidKv =KeyValueUtil.createLastOnRowCol(midKv);
214       checkSeekingConsistency(encodedSeekers, seekBefore, lastMidKv);
215     }
216   }
217 
218   static ByteBuffer encodeKeyValues(DataBlockEncoding encoding, List<KeyValue> kvs,
219       HFileBlockEncodingContext encodingContext) throws IOException {
220     DataBlockEncoder encoder = encoding.getEncoder();
221     ByteArrayOutputStream baos = new ByteArrayOutputStream();
222     baos.write(HConstants.HFILEBLOCK_DUMMY_HEADER);
223     DataOutputStream dos = new DataOutputStream(baos);
224     encoder.startBlockEncoding(encodingContext, dos);
225     for (KeyValue kv : kvs) {
226       encoder.encode(kv, encodingContext, dos);
227     }
228     BufferGrabbingByteArrayOutputStream stream = new BufferGrabbingByteArrayOutputStream();
229     baos.writeTo(stream);
230     encoder.endBlockEncoding(encodingContext, dos, stream.getBuffer());
231     byte[] encodedData = new byte[baos.size() - ENCODED_DATA_OFFSET];
232     System.arraycopy(baos.toByteArray(), ENCODED_DATA_OFFSET, encodedData, 0, encodedData.length);
233     return ByteBuffer.wrap(encodedData);
234   }
235 
236   @Test
237   public void testNextOnSample() throws IOException {
238     List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
239 
240     for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
241       if (encoding.getEncoder() == null) {
242         continue;
243       }
244       DataBlockEncoder encoder = encoding.getEncoder();
245       ByteBuffer encodedBuffer = encodeKeyValues(encoding, sampleKv,
246           getEncodingContext(Compression.Algorithm.NONE, encoding));
247       HFileContext meta = new HFileContextBuilder()
248                           .withHBaseCheckSum(false)
249                           .withIncludesMvcc(includesMemstoreTS)
250                           .withIncludesTags(includesTags)
251                           .withCompression(Compression.Algorithm.NONE)
252                           .build();
253       DataBlockEncoder.EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR,
254           encoder.newDataBlockDecodingContext(meta));
255       seeker.setCurrentBuffer(encodedBuffer);
256       int i = 0;
257       do {
258         KeyValue expectedKeyValue = sampleKv.get(i);
259         ByteBuffer keyValue = seeker.getKeyValueBuffer();
260         if (0 != Bytes.compareTo(keyValue.array(), keyValue.arrayOffset(), keyValue.limit(),
261             expectedKeyValue.getBuffer(), expectedKeyValue.getOffset(),
262             expectedKeyValue.getLength())) {
263 
264           int commonPrefix = 0;
265           byte[] left = keyValue.array();
266           byte[] right = expectedKeyValue.getBuffer();
267           int leftOff = keyValue.arrayOffset();
268           int rightOff = expectedKeyValue.getOffset();
269           int length = Math.min(keyValue.limit(), expectedKeyValue.getLength());
270           while (commonPrefix < length
271               && left[commonPrefix + leftOff] == right[commonPrefix + rightOff]) {
272             commonPrefix++;
273           }
274 
275           fail(String.format("next() produces wrong results "
276               + "encoder: %s i: %d commonPrefix: %d" + "\n expected %s\n actual      %s", encoder
277               .toString(), i, commonPrefix, Bytes.toStringBinary(expectedKeyValue.getBuffer(),
278               expectedKeyValue.getOffset(), expectedKeyValue.getLength()), Bytes
279               .toStringBinary(keyValue)));
280         }
281         i++;
282       } while (seeker.next());
283     }
284   }
285 
286   /**
287    * Test whether the decompression of first key is implemented correctly.
288    * @throws IOException
289    */
290   @Test
291   public void testFirstKeyInBlockOnSample() throws IOException {
292     List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
293 
294     for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
295       if (encoding.getEncoder() == null) {
296         continue;
297       }
298       DataBlockEncoder encoder = encoding.getEncoder();
299       ByteBuffer encodedBuffer = encodeKeyValues(encoding, sampleKv,
300           getEncodingContext(Compression.Algorithm.NONE, encoding));
301       ByteBuffer keyBuffer = encoder.getFirstKeyInBlock(encodedBuffer);
302       KeyValue firstKv = sampleKv.get(0);
303       if (0 != Bytes.compareTo(keyBuffer.array(), keyBuffer.arrayOffset(), keyBuffer.limit(),
304           firstKv.getBuffer(), firstKv.getKeyOffset(), firstKv.getKeyLength())) {
305 
306         int commonPrefix = 0;
307         int length = Math.min(keyBuffer.limit(), firstKv.getKeyLength());
308         while (commonPrefix < length
309             && keyBuffer.array()[keyBuffer.arrayOffset() + commonPrefix] == firstKv.getBuffer()[firstKv
310                 .getKeyOffset() + commonPrefix]) {
311           commonPrefix++;
312         }
313         fail(String.format("Bug in '%s' commonPrefix %d", encoder.toString(), commonPrefix));
314       }
315     }
316   }
317   
318   private void checkSeekingConsistency(List<DataBlockEncoder.EncodedSeeker> encodedSeekers,
319       boolean seekBefore, KeyValue keyValue) {
320     ByteBuffer expectedKeyValue = null;
321     ByteBuffer expectedKey = null;
322     ByteBuffer expectedValue = null;
323     for (DataBlockEncoder.EncodedSeeker seeker : encodedSeekers) {
324       seeker.seekToKeyInBlock(keyValue, seekBefore);
325       seeker.rewind();
326 
327       ByteBuffer actualKeyValue = seeker.getKeyValueBuffer();
328       ByteBuffer actualKey = seeker.getKeyDeepCopy();
329       ByteBuffer actualValue = seeker.getValueShallowCopy();
330 
331       if (expectedKeyValue != null) {
332         assertEquals(expectedKeyValue, actualKeyValue);
333       } else {
334         expectedKeyValue = actualKeyValue;
335       }
336 
337       if (expectedKey != null) {
338         assertEquals(expectedKey, actualKey);
339       } else {
340         expectedKey = actualKey;
341       }
342 
343       if (expectedValue != null) {
344         assertEquals(expectedValue, actualValue);
345       } else {
346         expectedValue = actualValue;
347       }
348     }
349   }
350 
351   private void testEncodersOnDataset(List<KeyValue> kvList, boolean includesMemstoreTS,
352       boolean includesTags) throws IOException {
353     ByteBuffer unencodedDataBuf = RedundantKVGenerator.convertKvToByteBuffer(kvList,
354         includesMemstoreTS);
355     HFileContext fileContext = new HFileContextBuilder().withIncludesMvcc(includesMemstoreTS)
356         .withIncludesTags(includesTags).build();
357     for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
358       DataBlockEncoder encoder = encoding.getEncoder();
359       if (encoder == null) {
360         continue;
361       }
362       HFileBlockEncodingContext encodingContext = new HFileBlockDefaultEncodingContext(encoding,
363           HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext);
364 
365       ByteArrayOutputStream baos = new ByteArrayOutputStream();
366       baos.write(HConstants.HFILEBLOCK_DUMMY_HEADER);
367       DataOutputStream dos = new DataOutputStream(baos);
368       encoder.startBlockEncoding(encodingContext, dos);
369       for (KeyValue kv : kvList) {
370         encoder.encode(kv, encodingContext, dos);
371       }
372       BufferGrabbingByteArrayOutputStream stream = new BufferGrabbingByteArrayOutputStream();
373       baos.writeTo(stream);
374       encoder.endBlockEncoding(encodingContext, dos, stream.getBuffer());
375       byte[] encodedData = baos.toByteArray();
376 
377       testAlgorithm(encodedData, unencodedDataBuf, encoder);
378     }
379   }
380   
381   @Test
382   public void testZeroByte() throws IOException {
383     List<KeyValue> kvList = new ArrayList<KeyValue>();
384     byte[] row = Bytes.toBytes("abcd");
385     byte[] family = new byte[] { 'f' };
386     byte[] qualifier0 = new byte[] { 'b' };
387     byte[] qualifier1 = new byte[] { 'c' };
388     byte[] value0 = new byte[] { 'd' };
389     byte[] value1 = new byte[] { 0x00 };
390     if (includesTags) {
391       kvList.add(new KeyValue(row, family, qualifier0, 0, value0, new Tag[] { new Tag((byte) 1,
392           "value1") }));
393       kvList.add(new KeyValue(row, family, qualifier1, 0, value1, new Tag[] { new Tag((byte) 1,
394           "value1") }));
395     } else {
396       kvList.add(new KeyValue(row, family, qualifier0, 0, Type.Put, value0));
397       kvList.add(new KeyValue(row, family, qualifier1, 0, Type.Put, value1));
398     }
399     testEncodersOnDataset(kvList, includesMemstoreTS, includesTags);
400   }
401 
402   private void testAlgorithm(byte[] encodedData, ByteBuffer unencodedDataBuf,
403       DataBlockEncoder encoder) throws IOException {
404     // decode
405     ByteArrayInputStream bais = new ByteArrayInputStream(encodedData, ENCODED_DATA_OFFSET,
406         encodedData.length - ENCODED_DATA_OFFSET);
407     DataInputStream dis = new DataInputStream(bais);
408     ByteBuffer actualDataset;
409     HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(false)
410         .withIncludesMvcc(includesMemstoreTS).withIncludesTags(includesTags)
411         .withCompression(Compression.Algorithm.NONE).build();
412     actualDataset = encoder.decodeKeyValues(dis, encoder.newDataBlockDecodingContext(meta));
413     actualDataset.rewind();
414 
415     // this is because in case of prefix tree the decoded stream will not have
416     // the
417     // mvcc in it.
418     assertEquals("Encoding -> decoding gives different results for " + encoder,
419         Bytes.toStringBinary(unencodedDataBuf), Bytes.toStringBinary(actualDataset));
420   }
421 }