View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.regionserver;
18  
19  import java.io.IOException;
20  import java.util.ArrayList;
21  import java.util.List;
22  import java.util.Random;
23  
24  import org.apache.hadoop.conf.Configuration;
25  import org.apache.hadoop.fs.Path;
26  import org.apache.hadoop.hbase.Cell;
27  import org.apache.hadoop.hbase.HBaseTestingUtility;
28  import org.apache.hadoop.hbase.HConstants;
29  import org.apache.hadoop.hbase.KeyValue;
30  import org.apache.hadoop.hbase.KeyValueUtil;
31  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
32  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
33  import org.apache.hadoop.hbase.io.hfile.LruBlockCache;
34  
35  /**
36   * Test seek performance for encoded data blocks. Read an HFile and do several
37   * random seeks.
38   */
39  public class EncodedSeekPerformanceTest {
40    private static final double NANOSEC_IN_SEC = 1000.0 * 1000.0 * 1000.0;
41    private static final double BYTES_IN_MEGABYTES = 1024.0 * 1024.0;
42    /** Default number of seeks which will be used in benchmark. */
43    public static int DEFAULT_NUMBER_OF_SEEKS = 10000;
44  
45    private final HBaseTestingUtility testingUtility = new HBaseTestingUtility();
46    private Configuration configuration = testingUtility.getConfiguration();
47    private CacheConfig cacheConf = new CacheConfig(configuration);
48    private Random randomizer;
49    private int numberOfSeeks;
50  
51    /** Use this benchmark with default options */
52    public EncodedSeekPerformanceTest() {
53      configuration.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.5f);
54      randomizer = new Random(42l);
55      numberOfSeeks = DEFAULT_NUMBER_OF_SEEKS;
56    }
57  
58    private List<Cell> prepareListOfTestSeeks(Path path) throws IOException {
59      List<Cell> allKeyValues = new ArrayList<Cell>();
60  
61      // read all of the key values
62      StoreFile storeFile = new StoreFile(testingUtility.getTestFileSystem(),
63          path, configuration, cacheConf, BloomType.NONE);
64  
65      StoreFile.Reader reader = storeFile.createReader();
66      StoreFileScanner scanner = reader.getStoreFileScanner(true, false);
67      Cell current;
68  
69      scanner.seek(KeyValue.LOWESTKEY);
70      while (null != (current = scanner.next())) {
71        allKeyValues.add(current);
72      }
73  
74      storeFile.closeReader(cacheConf.shouldEvictOnClose());
75  
76      // pick seeks by random
77      List<Cell> seeks = new ArrayList<Cell>();
78      for (int i = 0; i < numberOfSeeks; ++i) {
79        Cell keyValue = allKeyValues.get(
80            randomizer.nextInt(allKeyValues.size()));
81        seeks.add(keyValue);
82      }
83  
84      clearBlockCache();
85  
86      return seeks;
87    }
88  
89    private void runTest(Path path, DataBlockEncoding blockEncoding,
90        List<Cell> seeks) throws IOException {
91      // read all of the key values
92      StoreFile storeFile = new StoreFile(testingUtility.getTestFileSystem(),
93        path, configuration, cacheConf, BloomType.NONE);
94  
95      long totalSize = 0;
96  
97      StoreFile.Reader reader = storeFile.createReader();
98      StoreFileScanner scanner = reader.getStoreFileScanner(true, false);
99  
100     long startReadingTime = System.nanoTime();
101     Cell current;
102     scanner.seek(KeyValue.LOWESTKEY);
103     while (null != (current = scanner.next())) { // just iterate it!
104       if (KeyValueUtil.ensureKeyValue(current).getLength() < 0) {
105         throw new IOException("Negative KV size: " + current);
106       }
107       totalSize += KeyValueUtil.ensureKeyValue(current).getLength();
108     }
109     long finishReadingTime = System.nanoTime();
110 
111     // do seeks
112     long startSeeksTime = System.nanoTime();
113     for (Cell keyValue : seeks) {
114       scanner.seek(keyValue);
115       Cell toVerify = scanner.next();
116       if (!keyValue.equals(toVerify)) {
117         System.out.println(String.format("KeyValue doesn't match:\n" + "Orig key: %s\n"
118             + "Ret key:  %s", KeyValueUtil.ensureKeyValue(keyValue).getKeyString(), KeyValueUtil
119             .ensureKeyValue(toVerify).getKeyString()));
120         break;
121       }
122     }
123     long finishSeeksTime = System.nanoTime();
124     if (finishSeeksTime < startSeeksTime) {
125       throw new AssertionError("Finish time " + finishSeeksTime +
126           " is earlier than start time " + startSeeksTime);
127     }
128 
129     // write some stats
130     double readInMbPerSec = (totalSize * NANOSEC_IN_SEC) /
131         (BYTES_IN_MEGABYTES * (finishReadingTime - startReadingTime));
132     double seeksPerSec = (seeks.size() * NANOSEC_IN_SEC) /
133         (finishSeeksTime - startSeeksTime);
134 
135     storeFile.closeReader(cacheConf.shouldEvictOnClose());
136     clearBlockCache();
137 
138     System.out.println(blockEncoding);
139     System.out.printf("  Read speed:       %8.2f (MB/s)\n", readInMbPerSec);
140     System.out.printf("  Seeks per second: %8.2f (#/s)\n", seeksPerSec);
141     System.out.printf("  Total KV size:    %d\n", totalSize);
142   }
143 
144   /**
145    * @param path Path to the HFile which will be used.
146    * @param encoders List of encoders which will be used for tests.
147    * @throws IOException if there is a bug while reading from disk
148    */
149   public void runTests(Path path, DataBlockEncoding[] encodings)
150       throws IOException {
151     List<Cell> seeks = prepareListOfTestSeeks(path);
152 
153     for (DataBlockEncoding blockEncoding : encodings) {
154       runTest(path, blockEncoding, seeks);
155     }
156   }
157 
158   /**
159    * Command line interface:
160    * @param args Takes one argument - file size.
161    * @throws IOException if there is a bug while reading from disk
162    */
163   public static void main(final String[] args) throws IOException {
164     if (args.length < 1) {
165       printUsage();
166       System.exit(-1);
167     }
168 
169     Path path = new Path(args[0]);
170 
171     // TODO, this test doesn't work as expected any more. Need to fix.
172     EncodedSeekPerformanceTest utility = new EncodedSeekPerformanceTest();
173     utility.runTests(path, DataBlockEncoding.values());
174 
175     System.exit(0);
176   }
177 
178   private static void printUsage() {
179     System.out.println("Usage: one argument, name of the HFile");
180   }
181 
182   private void clearBlockCache() {
183     ((LruBlockCache) cacheConf.getBlockCache()).clearCache();
184   }
185 }