View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  import java.util.ArrayList;
24  import java.util.Arrays;
25  import java.util.Collections;
26  import java.util.Comparator;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.TreeSet;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FileSystem;
35  import org.apache.hadoop.fs.Path;
36  import org.apache.hadoop.hbase.Cell;
37  import org.apache.hadoop.hbase.HBaseTestCase;
38  import org.apache.hadoop.hbase.HBaseTestingUtility;
39  import org.apache.hadoop.hbase.HConstants;
40  import org.apache.hadoop.hbase.HRegionInfo;
41  import org.apache.hadoop.hbase.KeyValue;
42  import org.apache.hadoop.hbase.KeyValueUtil;
43  import org.apache.hadoop.hbase.testclassification.SmallTests;
44  import org.apache.hadoop.hbase.TableName;
45  import org.apache.hadoop.hbase.client.Scan;
46  import org.apache.hadoop.hbase.io.HFileLink;
47  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
48  import org.apache.hadoop.hbase.io.hfile.BlockCache;
49  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
50  import org.apache.hadoop.hbase.io.hfile.CacheStats;
51  import org.apache.hadoop.hbase.io.hfile.HFileContext;
52  import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
53  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
54  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
55  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
56  import org.apache.hadoop.hbase.util.BloomFilterFactory;
57  import org.apache.hadoop.hbase.util.Bytes;
58  import org.apache.hadoop.hbase.util.ChecksumType;
59  import org.apache.hadoop.hbase.util.FSUtils;
60  import org.junit.experimental.categories.Category;
61  import org.mockito.Mockito;
62  
63  import com.google.common.base.Joiner;
64  import com.google.common.collect.Iterables;
65  import com.google.common.collect.Lists;
66  
67  /**
68   * Test HStoreFile
69   */
70  @Category(SmallTests.class)
71  public class TestStoreFile extends HBaseTestCase {
72    static final Log LOG = LogFactory.getLog(TestStoreFile.class);
73    private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
74    private CacheConfig cacheConf =  new CacheConfig(TEST_UTIL.getConfiguration());
75    private static String ROOT_DIR = TEST_UTIL.getDataTestDir("TestStoreFile").toString();
76    private static final ChecksumType CKTYPE = ChecksumType.CRC32;
77    private static final int CKBYTES = 512;
78    private static String TEST_FAMILY = "cf";
79  
80    @Override
81    public void setUp() throws Exception {
82      super.setUp();
83    }
84  
85    @Override
86    public void tearDown() throws Exception {
87      super.tearDown();
88    }
89  
90    /**
91     * Write a file and then assert that we can read from top and bottom halves
92     * using two HalfMapFiles.
93     * @throws Exception
94     */
95    public void testBasicHalfMapFile() throws Exception {
96      final HRegionInfo hri =
97          new HRegionInfo(TableName.valueOf("testBasicHalfMapFileTb"));
98      HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(
99        conf, fs, new Path(this.testDir, hri.getTable().getNameAsString()), hri);
100 
101     HFileContext meta = new HFileContextBuilder().withBlockSize(2*1024).build();
102     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs)
103             .withFilePath(regionFs.createTempName())
104             .withFileContext(meta)
105             .build();
106     writeStoreFile(writer);
107 
108     Path sfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
109     StoreFile sf = new StoreFile(this.fs, sfPath, conf, cacheConf,
110       BloomType.NONE);
111     checkHalfHFile(regionFs, sf);
112   }
113 
114   private void writeStoreFile(final StoreFile.Writer writer) throws IOException {
115     writeStoreFile(writer, Bytes.toBytes(getName()), Bytes.toBytes(getName()));
116   }
117 
118   // pick an split point (roughly halfway)
119   byte[] SPLITKEY = new byte[] { (LAST_CHAR + FIRST_CHAR)/2, FIRST_CHAR};
120 
121   /*
122    * Writes HStoreKey and ImmutableBytes data to passed writer and
123    * then closes it.
124    * @param writer
125    * @throws IOException
126    */
127   public static void writeStoreFile(final StoreFile.Writer writer, byte[] fam, byte[] qualifier)
128   throws IOException {
129     long now = System.currentTimeMillis();
130     try {
131       for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) {
132         for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) {
133           byte[] b = new byte[] { (byte) d, (byte) e };
134           writer.append(new KeyValue(b, fam, qualifier, now, b));
135         }
136       }
137     } finally {
138       writer.close();
139     }
140   }
141 
142   /**
143    * Test that our mechanism of writing store files in one region to reference
144    * store files in other regions works.
145    * @throws IOException
146    */
147   public void testReference() throws IOException {
148     final HRegionInfo hri = new HRegionInfo(TableName.valueOf("testReferenceTb"));
149     HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(
150       conf, fs, new Path(this.testDir, hri.getTable().getNameAsString()), hri);
151 
152     HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
153     // Make a store file and write data to it.
154     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs)
155             .withFilePath(regionFs.createTempName())
156             .withFileContext(meta)
157             .build();
158     writeStoreFile(writer);
159 
160     Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
161     StoreFile hsf = new StoreFile(this.fs, hsfPath, conf, cacheConf,
162       BloomType.NONE);
163     StoreFile.Reader reader = hsf.createReader();
164     // Split on a row, not in middle of row.  Midkey returned by reader
165     // may be in middle of row.  Create new one with empty column and
166     // timestamp.
167     KeyValue kv = KeyValue.createKeyValueFromKey(reader.midkey());
168     byte [] midRow = kv.getRow();
169     kv = KeyValue.createKeyValueFromKey(reader.getLastKey());
170     byte [] finalRow = kv.getRow();
171     // Make a reference
172     HRegionInfo splitHri = new HRegionInfo(hri.getTable(), null, midRow);
173     Path refPath = splitStoreFile(regionFs, splitHri, TEST_FAMILY, hsf, midRow, true);
174     StoreFile refHsf = new StoreFile(this.fs, refPath, conf, cacheConf,
175       BloomType.NONE);
176     // Now confirm that I can read from the reference and that it only gets
177     // keys from top half of the file.
178     HFileScanner s = refHsf.createReader().getScanner(false, false);
179     for(boolean first = true; (!s.isSeeked() && s.seekTo()) || s.next();) {
180       ByteBuffer bb = s.getKey();
181       kv = KeyValue.createKeyValueFromKey(bb);
182       if (first) {
183         assertTrue(Bytes.equals(kv.getRow(), midRow));
184         first = false;
185       }
186     }
187     assertTrue(Bytes.equals(kv.getRow(), finalRow));
188   }
189 
190   public void testHFileLink() throws IOException {
191     final HRegionInfo hri = new HRegionInfo(TableName.valueOf("testHFileLinkTb"));
192     // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
193     Configuration testConf = new Configuration(this.conf);
194     FSUtils.setRootDir(testConf, this.testDir);
195     HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(
196       testConf, fs, FSUtils.getTableDir(this.testDir, hri.getTable()), hri);
197     HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
198 
199     // Make a store file and write data to it.
200     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs)
201             .withFilePath(regionFs.createTempName())
202             .withFileContext(meta)
203             .build();
204     writeStoreFile(writer);
205 
206     Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
207     Path dstPath = new Path(regionFs.getTableDir(), new Path("test-region", TEST_FAMILY));
208     HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
209     Path linkFilePath = new Path(dstPath,
210                   HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
211 
212     // Try to open store file from link
213     StoreFileInfo storeFileInfo = new StoreFileInfo(testConf, this.fs, linkFilePath);
214     StoreFile hsf = new StoreFile(this.fs, storeFileInfo, testConf, cacheConf,
215       BloomType.NONE);
216     assertTrue(storeFileInfo.isLink());
217 
218     // Now confirm that I can read from the link
219     int count = 1;
220     HFileScanner s = hsf.createReader().getScanner(false, false);
221     s.seekTo();
222     while (s.next()) {
223       count++;
224     }
225     assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
226   }
227 
228   /**
229    * This test creates an hfile and then the dir structures and files to verify that references
230    * to hfilelinks (created by snapshot clones) can be properly interpreted.
231    */
232   public void testReferenceToHFileLink() throws IOException {
233     // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
234     Configuration testConf = new Configuration(this.conf);
235     FSUtils.setRootDir(testConf, this.testDir);
236 
237     // adding legal table name chars to verify regex handles it.
238     HRegionInfo hri = new HRegionInfo(TableName.valueOf("_original-evil-name"));
239     HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(
240       testConf, fs, FSUtils.getTableDir(this.testDir, hri.getTable()), hri);
241 
242     HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
243     // Make a store file and write data to it. <root>/<tablename>/<rgn>/<cf>/<file>
244     StoreFile.Writer writer = new StoreFile.WriterBuilder(testConf, cacheConf, this.fs)
245             .withFilePath(regionFs.createTempName())
246             .withFileContext(meta)
247             .build();
248     writeStoreFile(writer);
249     Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
250 
251     // create link to store file. <root>/clone/region/<cf>/<hfile>-<region>-<table>
252     HRegionInfo hriClone = new HRegionInfo(TableName.valueOf("clone"));
253     HRegionFileSystem cloneRegionFs = HRegionFileSystem.createRegionOnFileSystem(
254       testConf, fs, FSUtils.getTableDir(this.testDir, hri.getTable()),
255         hriClone);
256     Path dstPath = cloneRegionFs.getStoreDir(TEST_FAMILY);
257     HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
258     Path linkFilePath = new Path(dstPath,
259                   HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
260 
261     // create splits of the link.
262     // <root>/clone/splitA/<cf>/<reftohfilelink>,
263     // <root>/clone/splitB/<cf>/<reftohfilelink>
264     HRegionInfo splitHriA = new HRegionInfo(hri.getTable(), null, SPLITKEY);
265     HRegionInfo splitHriB = new HRegionInfo(hri.getTable(), SPLITKEY, null);
266     StoreFile f = new StoreFile(fs, linkFilePath, testConf, cacheConf, BloomType.NONE);
267     Path pathA = splitStoreFile(cloneRegionFs, splitHriA, TEST_FAMILY, f, SPLITKEY, true); // top
268     Path pathB = splitStoreFile(cloneRegionFs, splitHriB, TEST_FAMILY, f, SPLITKEY, false);// bottom
269 
270     // OK test the thing
271     FSUtils.logFileSystemState(fs, this.testDir, LOG);
272 
273     // There is a case where a file with the hfilelink pattern is actually a daughter
274     // reference to a hfile link.  This code in StoreFile that handles this case.
275 
276     // Try to open store file from link
277     StoreFile hsfA = new StoreFile(this.fs, pathA, testConf, cacheConf,
278       BloomType.NONE);
279 
280     // Now confirm that I can read from the ref to link
281     int count = 1;
282     HFileScanner s = hsfA.createReader().getScanner(false, false);
283     s.seekTo();
284     while (s.next()) {
285       count++;
286     }
287     assertTrue(count > 0); // read some rows here
288 
289     // Try to open store file from link
290     StoreFile hsfB = new StoreFile(this.fs, pathB, testConf, cacheConf,
291       BloomType.NONE);
292 
293     // Now confirm that I can read from the ref to link
294     HFileScanner sB = hsfB.createReader().getScanner(false, false);
295     sB.seekTo();
296     
297     //count++ as seekTo() will advance the scanner
298     count++;
299     while (sB.next()) {
300       count++;
301     }
302 
303     // read the rest of the rows
304     assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
305   }
306 
307   private void checkHalfHFile(final HRegionFileSystem regionFs, final StoreFile f)
308       throws IOException {
309     byte [] midkey = f.createReader().midkey();
310     KeyValue midKV = KeyValue.createKeyValueFromKey(midkey);
311     byte [] midRow = midKV.getRow();
312     // Create top split.
313     HRegionInfo topHri = new HRegionInfo(regionFs.getRegionInfo().getTable(),
314         null, midRow);
315     Path topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, midRow, true);
316     // Create bottom split.
317     HRegionInfo bottomHri = new HRegionInfo(regionFs.getRegionInfo().getTable(),
318         midRow, null);
319     Path bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, midRow, false);
320     // Make readers on top and bottom.
321     StoreFile.Reader top = new StoreFile(
322       this.fs, topPath, conf, cacheConf, BloomType.NONE).createReader();
323     StoreFile.Reader bottom = new StoreFile(
324       this.fs, bottomPath, conf, cacheConf, BloomType.NONE).createReader();
325     ByteBuffer previous = null;
326     LOG.info("Midkey: " + midKV.toString());
327     ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midkey);
328     try {
329       // Now make two HalfMapFiles and assert they can read the full backing
330       // file, one from the top and the other from the bottom.
331       // Test bottom half first.
332       // Now test reading from the top.
333       boolean first = true;
334       ByteBuffer key = null;
335       HFileScanner topScanner = top.getScanner(false, false);
336       while ((!topScanner.isSeeked() && topScanner.seekTo()) ||
337              (topScanner.isSeeked() && topScanner.next())) {
338         key = topScanner.getKey();
339 
340         if (topScanner.getReader().getComparator().compareFlatKey(key.array(),
341           key.arrayOffset(), key.limit(), midkey, 0, midkey.length) < 0) {
342           fail("key=" + Bytes.toStringBinary(key) + " < midkey=" +
343               Bytes.toStringBinary(midkey));
344         }
345         if (first) {
346           first = false;
347           LOG.info("First in top: " + Bytes.toString(Bytes.toBytes(key)));
348         }
349       }
350       LOG.info("Last in top: " + Bytes.toString(Bytes.toBytes(key)));
351 
352       first = true;
353       HFileScanner bottomScanner = bottom.getScanner(false, false);
354       while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) ||
355           bottomScanner.next()) {
356         previous = bottomScanner.getKey();
357         key = bottomScanner.getKey();
358         if (first) {
359           first = false;
360           LOG.info("First in bottom: " +
361             Bytes.toString(Bytes.toBytes(previous)));
362         }
363         assertTrue(key.compareTo(bbMidkeyBytes) < 0);
364       }
365       if (previous != null) {
366         LOG.info("Last in bottom: " + Bytes.toString(Bytes.toBytes(previous)));
367       }
368       // Remove references.
369       regionFs.cleanupDaughterRegion(topHri);
370       regionFs.cleanupDaughterRegion(bottomHri);
371 
372       // Next test using a midkey that does not exist in the file.
373       // First, do a key that is < than first key. Ensure splits behave
374       // properly.
375       byte [] badmidkey = Bytes.toBytes("  .");
376       assertTrue(fs.exists(f.getPath()));
377       topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true);
378       bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false);
379       
380       assertNull(bottomPath);
381       
382       top = new StoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE).createReader();
383       // Now read from the top.
384       first = true;
385       topScanner = top.getScanner(false, false);
386       while ((!topScanner.isSeeked() && topScanner.seekTo()) ||
387           topScanner.next()) {
388         key = topScanner.getKey();
389         assertTrue(topScanner.getReader().getComparator().compareFlatKey(key.array(),
390           key.arrayOffset(), key.limit(), badmidkey, 0, badmidkey.length) >= 0);
391         if (first) {
392           first = false;
393           KeyValue keyKV = KeyValue.createKeyValueFromKey(key);
394           LOG.info("First top when key < bottom: " + keyKV);
395           String tmp = Bytes.toString(keyKV.getRow());
396           for (int i = 0; i < tmp.length(); i++) {
397             assertTrue(tmp.charAt(i) == 'a');
398           }
399         }
400       }
401       KeyValue keyKV = KeyValue.createKeyValueFromKey(key);
402       LOG.info("Last top when key < bottom: " + keyKV);
403       String tmp = Bytes.toString(keyKV.getRow());
404       for (int i = 0; i < tmp.length(); i++) {
405         assertTrue(tmp.charAt(i) == 'z');
406       }
407       // Remove references.
408       regionFs.cleanupDaughterRegion(topHri);
409       regionFs.cleanupDaughterRegion(bottomHri);
410 
411       // Test when badkey is > than last key in file ('||' > 'zz').
412       badmidkey = Bytes.toBytes("|||");
413       topPath = splitStoreFile(regionFs,topHri, TEST_FAMILY, f, badmidkey, true);
414       bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false);
415       assertNull(topPath);
416       bottom = new StoreFile(this.fs, bottomPath, conf, cacheConf,
417         BloomType.NONE).createReader();
418       first = true;
419       bottomScanner = bottom.getScanner(false, false);
420       while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) ||
421           bottomScanner.next()) {
422         key = bottomScanner.getKey();
423         if (first) {
424           first = false;
425           keyKV = KeyValue.createKeyValueFromKey(key);
426           LOG.info("First bottom when key > top: " + keyKV);
427           tmp = Bytes.toString(keyKV.getRow());
428           for (int i = 0; i < tmp.length(); i++) {
429             assertTrue(tmp.charAt(i) == 'a');
430           }
431         }
432       }
433       keyKV = KeyValue.createKeyValueFromKey(key);
434       LOG.info("Last bottom when key > top: " + keyKV);
435       for (int i = 0; i < tmp.length(); i++) {
436         assertTrue(Bytes.toString(keyKV.getRow()).charAt(i) == 'z');
437       }
438     } finally {
439       if (top != null) {
440         top.close(true); // evict since we are about to delete the file
441       }
442       if (bottom != null) {
443         bottom.close(true); // evict since we are about to delete the file
444       }
445       fs.delete(f.getPath(), true);
446     }
447   }
448 
449   private static final String localFormatter = "%010d";
450 
451   private void bloomWriteRead(StoreFile.Writer writer, FileSystem fs) throws Exception {
452     float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
453     Path f = writer.getPath();
454     long now = System.currentTimeMillis();
455     for (int i = 0; i < 2000; i += 2) {
456       String row = String.format(localFormatter, i);
457       KeyValue kv = new KeyValue(row.getBytes(), "family".getBytes(),
458         "col".getBytes(), now, "value".getBytes());
459       writer.append(kv);
460     }
461     writer.close();
462 
463     StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, conf);
464     reader.loadFileInfo();
465     reader.loadBloomfilter();
466     StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
467 
468     // check false positives rate
469     int falsePos = 0;
470     int falseNeg = 0;
471     for (int i = 0; i < 2000; i++) {
472       String row = String.format(localFormatter, i);
473       TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
474       columns.add("family:col".getBytes());
475 
476       Scan scan = new Scan(row.getBytes(),row.getBytes());
477       scan.addColumn("family".getBytes(), "family:col".getBytes());
478       boolean exists = scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE);
479       if (i % 2 == 0) {
480         if (!exists) falseNeg++;
481       } else {
482         if (exists) falsePos++;
483       }
484     }
485     reader.close(true); // evict because we are about to delete the file
486     fs.delete(f, true);
487     assertEquals("False negatives: " + falseNeg, 0, falseNeg);
488     int maxFalsePos = (int) (2 * 2000 * err);
489     assertTrue("Too many false positives: " + falsePos + " (err=" + err
490         + ", expected no more than " + maxFalsePos + ")",
491         falsePos <= maxFalsePos);
492   }
493   
494   private static final int BLOCKSIZE_SMALL = 8192;
495 
496   public void testBloomFilter() throws Exception {
497     FileSystem fs = FileSystem.getLocal(conf);
498     conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
499     conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
500 
501     // write the file
502     Path f = new Path(ROOT_DIR, getName());
503     HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
504                         .withChecksumType(CKTYPE)
505                         .withBytesPerCheckSum(CKBYTES).build();
506     // Make a store file and write data to it.
507     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs)
508             .withFilePath(f)
509             .withBloomType(BloomType.ROW)
510             .withMaxKeyCount(2000)
511             .withFileContext(meta)
512             .build();
513     bloomWriteRead(writer, fs);
514   }
515 
516   public void testDeleteFamilyBloomFilter() throws Exception {
517     FileSystem fs = FileSystem.getLocal(conf);
518     conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
519     conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
520     float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
521 
522     // write the file
523     Path f = new Path(ROOT_DIR, getName());
524 
525     HFileContext meta = new HFileContextBuilder()
526                         .withBlockSize(BLOCKSIZE_SMALL)
527                         .withChecksumType(CKTYPE)
528                         .withBytesPerCheckSum(CKBYTES).build();
529     // Make a store file and write data to it.
530     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs)
531             .withFilePath(f)
532             .withMaxKeyCount(2000)
533             .withFileContext(meta)
534             .build();
535 
536     // add delete family
537     long now = System.currentTimeMillis();
538     for (int i = 0; i < 2000; i += 2) {
539       String row = String.format(localFormatter, i);
540       KeyValue kv = new KeyValue(row.getBytes(), "family".getBytes(),
541           "col".getBytes(), now, KeyValue.Type.DeleteFamily, "value".getBytes());
542       writer.append(kv);
543     }
544     writer.close();
545 
546     StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, conf);
547     reader.loadFileInfo();
548     reader.loadBloomfilter();
549 
550     // check false positives rate
551     int falsePos = 0;
552     int falseNeg = 0;
553     for (int i = 0; i < 2000; i++) {
554       String row = String.format(localFormatter, i);
555       byte[] rowKey = Bytes.toBytes(row);
556       boolean exists = reader.passesDeleteFamilyBloomFilter(rowKey, 0,
557           rowKey.length);
558       if (i % 2 == 0) {
559         if (!exists)
560           falseNeg++;
561       } else {
562         if (exists)
563           falsePos++;
564       }
565     }
566     assertEquals(1000, reader.getDeleteFamilyCnt());
567     reader.close(true); // evict because we are about to delete the file
568     fs.delete(f, true);
569     assertEquals("False negatives: " + falseNeg, 0, falseNeg);
570     int maxFalsePos = (int) (2 * 2000 * err);
571     assertTrue("Too many false positives: " + falsePos + " (err=" + err
572         + ", expected no more than " + maxFalsePos, falsePos <= maxFalsePos);
573   }
574 
575   /**
576    * Test for HBASE-8012
577    */
578   public void testReseek() throws Exception {
579     // write the file
580     Path f = new Path(ROOT_DIR, getName());
581     HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
582     // Make a store file and write data to it.
583     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs)
584             .withFilePath(f)
585             .withFileContext(meta)
586             .build();
587 
588     writeStoreFile(writer);
589     writer.close();
590 
591     StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, conf);
592 
593     // Now do reseek with empty KV to position to the beginning of the file
594 
595     KeyValue k = KeyValueUtil.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY);
596     StoreFileScanner s = reader.getStoreFileScanner(false, false);
597     s.reseek(k);
598 
599     assertNotNull("Intial reseek should position at the beginning of the file", s.peek());
600   }
601 
602   public void testBloomTypes() throws Exception {
603     float err = (float) 0.01;
604     FileSystem fs = FileSystem.getLocal(conf);
605     conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err);
606     conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
607 
608     int rowCount = 50;
609     int colCount = 10;
610     int versions = 2;
611 
612     // run once using columns and once using rows
613     BloomType[] bt = {BloomType.ROWCOL, BloomType.ROW};
614     int[] expKeys  = {rowCount*colCount, rowCount};
615     // below line deserves commentary.  it is expected bloom false positives
616     //  column = rowCount*2*colCount inserts
617     //  row-level = only rowCount*2 inserts, but failures will be magnified by
618     //              2nd for loop for every column (2*colCount)
619     float[] expErr   = {2*rowCount*colCount*err, 2*rowCount*2*colCount*err};
620 
621     for (int x : new int[]{0,1}) {
622       // write the file
623       Path f = new Path(ROOT_DIR, getName() + x);
624       HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
625           .withChecksumType(CKTYPE)
626           .withBytesPerCheckSum(CKBYTES).build();
627       // Make a store file and write data to it.
628       StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs)
629               .withFilePath(f)
630               .withBloomType(bt[x])
631               .withMaxKeyCount(expKeys[x])
632               .withFileContext(meta)
633               .build();
634 
635       long now = System.currentTimeMillis();
636       for (int i = 0; i < rowCount*2; i += 2) { // rows
637         for (int j = 0; j < colCount*2; j += 2) {   // column qualifiers
638           String row = String.format(localFormatter, i);
639           String col = String.format(localFormatter, j);
640           for (int k= 0; k < versions; ++k) { // versions
641             KeyValue kv = new KeyValue(row.getBytes(),
642               "family".getBytes(), ("col" + col).getBytes(),
643                 now-k, Bytes.toBytes((long)-1));
644             writer.append(kv);
645           }
646         }
647       }
648       writer.close();
649 
650       StoreFile.Reader reader = new StoreFile.Reader(fs, f, cacheConf, conf);
651       reader.loadFileInfo();
652       reader.loadBloomfilter();
653       StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
654       assertEquals(expKeys[x], reader.generalBloomFilter.getKeyCount());
655 
656       // check false positives rate
657       int falsePos = 0;
658       int falseNeg = 0;
659       for (int i = 0; i < rowCount*2; ++i) { // rows
660         for (int j = 0; j < colCount*2; ++j) {   // column qualifiers
661           String row = String.format(localFormatter, i);
662           String col = String.format(localFormatter, j);
663           TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
664           columns.add(("col" + col).getBytes());
665 
666           Scan scan = new Scan(row.getBytes(),row.getBytes());
667           scan.addColumn("family".getBytes(), ("col"+col).getBytes());
668           boolean exists =
669               scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE);
670           boolean shouldRowExist = i % 2 == 0;
671           boolean shouldColExist = j % 2 == 0;
672           shouldColExist = shouldColExist || bt[x] == BloomType.ROW;
673           if (shouldRowExist && shouldColExist) {
674             if (!exists) falseNeg++;
675           } else {
676             if (exists) falsePos++;
677           }
678         }
679       }
680       reader.close(true); // evict because we are about to delete the file
681       fs.delete(f, true);
682       System.out.println(bt[x].toString());
683       System.out.println("  False negatives: " + falseNeg);
684       System.out.println("  False positives: " + falsePos);
685       assertEquals(0, falseNeg);
686       assertTrue(falsePos < 2*expErr[x]);
687     }
688   }
689 
690   public void testSeqIdComparator() {
691     assertOrdering(StoreFile.Comparators.SEQ_ID,
692         mockStoreFile(true,  100,   1000, -1, "/foo/123"),
693         mockStoreFile(true,  100,   1000, -1, "/foo/124"),
694         mockStoreFile(true,  99,    1000, -1, "/foo/126"),
695         mockStoreFile(true,  98,    2000, -1, "/foo/126"),
696         mockStoreFile(false, 3453, -1,     1, "/foo/1"),
697         mockStoreFile(false, 2,    -1,     3, "/foo/2"),
698         mockStoreFile(false, 1000, -1,     5, "/foo/2"),
699         mockStoreFile(false, 76,   -1,     5, "/foo/3"));
700   }
701 
702   /**
703    * Assert that the given comparator orders the given storefiles in the
704    * same way that they're passed.
705    */
706   private void assertOrdering(Comparator<StoreFile> comparator, StoreFile ... sfs) {
707     ArrayList<StoreFile> sorted = Lists.newArrayList(sfs);
708     Collections.shuffle(sorted);
709     Collections.sort(sorted, comparator);
710     LOG.debug("sfs: " + Joiner.on(",").join(sfs));
711     LOG.debug("sorted: " + Joiner.on(",").join(sorted));
712     assertTrue(Iterables.elementsEqual(Arrays.asList(sfs), sorted));
713   }
714 
715   /**
716    * Create a mock StoreFile with the given attributes.
717    */
718   private StoreFile mockStoreFile(boolean bulkLoad,
719                                   long size,
720                                   long bulkTimestamp,
721                                   long seqId,
722                                   String path) {
723     StoreFile mock = Mockito.mock(StoreFile.class);
724     StoreFile.Reader reader = Mockito.mock(StoreFile.Reader.class);
725 
726     Mockito.doReturn(size).when(reader).length();
727 
728     Mockito.doReturn(reader).when(mock).getReader();
729     Mockito.doReturn(bulkLoad).when(mock).isBulkLoadResult();
730     Mockito.doReturn(bulkTimestamp).when(mock).getBulkLoadTimestamp();
731     Mockito.doReturn(seqId).when(mock).getMaxSequenceId();
732     Mockito.doReturn(new Path(path)).when(mock).getPath();
733     String name = "mock storefile, bulkLoad=" + bulkLoad +
734       " bulkTimestamp=" + bulkTimestamp +
735       " seqId=" + seqId +
736       " path=" + path;
737     Mockito.doReturn(name).when(mock).toString();
738     return mock;
739   }
740 
741   /**
742    * Generate a list of KeyValues for testing based on given parameters
743    * @param timestamps
744    * @param numRows
745    * @param qualifier
746    * @param family
747    * @return
748    */
749   List<KeyValue> getKeyValueSet(long[] timestamps, int numRows,
750       byte[] qualifier, byte[] family) {
751     List<KeyValue> kvList = new ArrayList<KeyValue>();
752     for (int i=1;i<=numRows;i++) {
753       byte[] b = Bytes.toBytes(i) ;
754       LOG.info(Bytes.toString(b));
755       LOG.info(Bytes.toString(b));
756       for (long timestamp: timestamps)
757       {
758         kvList.add(new KeyValue(b, family, qualifier, timestamp, b));
759       }
760     }
761     return kvList;
762   }
763 
764   /**
765    * Test to ensure correctness when using StoreFile with multiple timestamps
766    * @throws IOException
767    */
768   public void testMultipleTimestamps() throws IOException {
769     byte[] family = Bytes.toBytes("familyname");
770     byte[] qualifier = Bytes.toBytes("qualifier");
771     int numRows = 10;
772     long[] timestamps = new long[] {20,10,5,1};
773     Scan scan = new Scan();
774 
775     // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
776     Path storedir = new Path(new Path(this.testDir, "7e0102"), "familyname");
777     Path dir = new Path(storedir, "1234567890");
778     HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
779     // Make a store file and write data to it.
780     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs)
781             .withOutputDir(dir)
782             .withFileContext(meta)
783             .build();
784 
785     List<KeyValue> kvList = getKeyValueSet(timestamps,numRows,
786         family, qualifier);
787 
788     for (KeyValue kv : kvList) {
789       writer.append(kv);
790     }
791     writer.appendMetadata(0, false);
792     writer.close();
793 
794     StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
795       BloomType.NONE);
796     StoreFile.Reader reader = hsf.createReader();
797     StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
798     TreeSet<byte[]> columns = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
799     columns.add(qualifier);
800 
801     scan.setTimeRange(20, 100);
802     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
803 
804     scan.setTimeRange(1, 2);
805     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
806 
807     scan.setTimeRange(8, 10);
808     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
809 
810     scan.setTimeRange(7, 50);
811     assertTrue(scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
812 
813     // This test relies on the timestamp range optimization
814     scan.setTimeRange(27, 50);
815     assertTrue(!scanner.shouldUseScanner(scan, columns, Long.MIN_VALUE));
816   }
817 
818   public void testCacheOnWriteEvictOnClose() throws Exception {
819     Configuration conf = this.conf;
820 
821     // Find a home for our files (regiondir ("7e0102") and familyname).
822     Path baseDir = new Path(new Path(this.testDir, "7e0102"),"twoCOWEOC");
823 
824     // Grab the block cache and get the initial hit/miss counts
825     BlockCache bc = new CacheConfig(conf).getBlockCache();
826     assertNotNull(bc);
827     CacheStats cs = bc.getStats();
828     long startHit = cs.getHitCount();
829     long startMiss = cs.getMissCount();
830     long startEvicted = cs.getEvictedCount();
831 
832     // Let's write a StoreFile with three blocks, with cache on write off
833     conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, false);
834     CacheConfig cacheConf = new CacheConfig(conf);
835     Path pathCowOff = new Path(baseDir, "123456789");
836     StoreFile.Writer writer = writeStoreFile(conf, cacheConf, pathCowOff, 3);
837     StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
838       BloomType.NONE);
839     LOG.debug(hsf.getPath().toString());
840 
841     // Read this file, we should see 3 misses
842     StoreFile.Reader reader = hsf.createReader();
843     reader.loadFileInfo();
844     StoreFileScanner scanner = reader.getStoreFileScanner(true, true);
845     scanner.seek(KeyValue.LOWESTKEY);
846     while (scanner.next() != null);
847     assertEquals(startHit, cs.getHitCount());
848     assertEquals(startMiss + 3, cs.getMissCount());
849     assertEquals(startEvicted, cs.getEvictedCount());
850     startMiss += 3;
851     scanner.close();
852     reader.close(cacheConf.shouldEvictOnClose());
853 
854     // Now write a StoreFile with three blocks, with cache on write on
855     conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true);
856     cacheConf = new CacheConfig(conf);
857     Path pathCowOn = new Path(baseDir, "123456788");
858     writer = writeStoreFile(conf, cacheConf, pathCowOn, 3);
859     hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf,
860       BloomType.NONE);
861 
862     // Read this file, we should see 3 hits
863     reader = hsf.createReader();
864     scanner = reader.getStoreFileScanner(true, true);
865     scanner.seek(KeyValue.LOWESTKEY);
866     while (scanner.next() != null);
867     assertEquals(startHit + 3, cs.getHitCount());
868     assertEquals(startMiss, cs.getMissCount());
869     assertEquals(startEvicted, cs.getEvictedCount());
870     startHit += 3;
871     scanner.close();
872     reader.close(cacheConf.shouldEvictOnClose());
873 
874     // Let's read back the two files to ensure the blocks exactly match
875     hsf = new StoreFile(this.fs, pathCowOff, conf, cacheConf,
876       BloomType.NONE);
877     StoreFile.Reader readerOne = hsf.createReader();
878     readerOne.loadFileInfo();
879     StoreFileScanner scannerOne = readerOne.getStoreFileScanner(true, true);
880     scannerOne.seek(KeyValue.LOWESTKEY);
881     hsf = new StoreFile(this.fs, pathCowOn, conf, cacheConf,
882       BloomType.NONE);
883     StoreFile.Reader readerTwo = hsf.createReader();
884     readerTwo.loadFileInfo();
885     StoreFileScanner scannerTwo = readerTwo.getStoreFileScanner(true, true);
886     scannerTwo.seek(KeyValue.LOWESTKEY);
887     Cell kv1 = null;
888     Cell kv2 = null;
889     while ((kv1 = scannerOne.next()) != null) {
890       kv2 = scannerTwo.next();
891       assertTrue(kv1.equals(kv2));
892       KeyValue keyv1 = KeyValueUtil.ensureKeyValue(kv1);
893       KeyValue keyv2 = KeyValueUtil.ensureKeyValue(kv2);
894       assertTrue(Bytes.compareTo(
895           keyv1.getBuffer(), keyv1.getKeyOffset(), keyv1.getKeyLength(), 
896           keyv2.getBuffer(), keyv2.getKeyOffset(), keyv2.getKeyLength()) == 0);
897       assertTrue(Bytes.compareTo(
898           kv1.getValueArray(), kv1.getValueOffset(), kv1.getValueLength(),
899           kv2.getValueArray(), kv2.getValueOffset(), kv2.getValueLength()) == 0);
900     }
901     assertNull(scannerTwo.next());
902     assertEquals(startHit + 6, cs.getHitCount());
903     assertEquals(startMiss, cs.getMissCount());
904     assertEquals(startEvicted, cs.getEvictedCount());
905     startHit += 6;
906     scannerOne.close();
907     readerOne.close(cacheConf.shouldEvictOnClose());
908     scannerTwo.close();
909     readerTwo.close(cacheConf.shouldEvictOnClose());
910 
911     // Let's close the first file with evict on close turned on
912     conf.setBoolean("hbase.rs.evictblocksonclose", true);
913     cacheConf = new CacheConfig(conf);
914     hsf = new StoreFile(this.fs, pathCowOff, conf, cacheConf,
915       BloomType.NONE);
916     reader = hsf.createReader();
917     reader.close(cacheConf.shouldEvictOnClose());
918 
919     // We should have 3 new evictions
920     assertEquals(startHit, cs.getHitCount());
921     assertEquals(startMiss, cs.getMissCount());
922     assertEquals(startEvicted + 3, cs.getEvictedCount());
923     startEvicted += 3;
924 
925     // Let's close the second file with evict on close turned off
926     conf.setBoolean("hbase.rs.evictblocksonclose", false);
927     cacheConf = new CacheConfig(conf);
928     hsf = new StoreFile(this.fs, pathCowOn, conf, cacheConf,
929       BloomType.NONE);
930     reader = hsf.createReader();
931     reader.close(cacheConf.shouldEvictOnClose());
932 
933     // We expect no changes
934     assertEquals(startHit, cs.getHitCount());
935     assertEquals(startMiss, cs.getMissCount());
936     assertEquals(startEvicted, cs.getEvictedCount());
937   }
938 
939   private Path splitStoreFile(final HRegionFileSystem regionFs, final HRegionInfo hri,
940       final String family, final StoreFile sf, final byte[] splitKey, boolean isTopRef)
941       throws IOException {
942     FileSystem fs = regionFs.getFileSystem();
943     Path path = regionFs.splitStoreFile(hri, family, sf, splitKey, isTopRef, null);
944     if (null == path) {
945       return null;
946     }
947     Path regionDir = regionFs.commitDaughterRegion(hri);
948     return new Path(new Path(regionDir, family), path.getName());
949   }
950 
951   private StoreFile.Writer writeStoreFile(Configuration conf,
952       CacheConfig cacheConf, Path path, int numBlocks)
953   throws IOException {
954     // Let's put ~5 small KVs in each block, so let's make 5*numBlocks KVs
955     int numKVs = 5 * numBlocks;
956     List<KeyValue> kvs = new ArrayList<KeyValue>(numKVs);
957     byte [] b = Bytes.toBytes("x");
958     int totalSize = 0;
959     for (int i=numKVs;i>0;i--) {
960       KeyValue kv = new KeyValue(b, b, b, i, b);
961       kvs.add(kv);
962       // kv has memstoreTS 0, which takes 1 byte to store.
963       totalSize += kv.getLength() + 1;
964     }
965     int blockSize = totalSize / numBlocks;
966     HFileContext meta = new HFileContextBuilder().withBlockSize(blockSize)
967                         .withChecksumType(CKTYPE)
968                         .withBytesPerCheckSum(CKBYTES)
969                         .build();
970     // Make a store file and write data to it.
971     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs)
972             .withFilePath(path)
973             .withMaxKeyCount(2000)
974             .withFileContext(meta)
975             .build();
976     // We'll write N-1 KVs to ensure we don't write an extra block
977     kvs.remove(kvs.size()-1);
978     for (KeyValue kv : kvs) {
979       writer.append(kv);
980     }
981     writer.appendMetadata(0, false);
982     writer.close();
983     return writer;
984   }
985 
986   /**
987    * Check if data block encoding information is saved correctly in HFile's
988    * file info.
989    */
990   public void testDataBlockEncodingMetaData() throws IOException {
991     // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
992     Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname");
993     Path path = new Path(dir, "1234567890");
994 
995     DataBlockEncoding dataBlockEncoderAlgo =
996         DataBlockEncoding.FAST_DIFF;
997     HFileDataBlockEncoder dataBlockEncoder =
998         new HFileDataBlockEncoderImpl(
999             dataBlockEncoderAlgo);
1000     cacheConf = new CacheConfig(conf);
1001     HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
1002         .withChecksumType(CKTYPE)
1003         .withBytesPerCheckSum(CKBYTES)
1004         .withDataBlockEncoding(dataBlockEncoderAlgo)
1005         .build();
1006     // Make a store file and write data to it.
1007     StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs)
1008             .withFilePath(path)
1009             .withMaxKeyCount(2000)
1010             .withFileContext(meta)
1011             .build();
1012     writer.close();
1013 
1014     StoreFile storeFile = new StoreFile(fs, writer.getPath(), conf,
1015       cacheConf, BloomType.NONE);
1016     StoreFile.Reader reader = storeFile.createReader();
1017 
1018     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
1019     byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
1020     assertEquals(dataBlockEncoderAlgo.getNameInBytes(), value);
1021   }
1022 }
1023