View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.List;
24  import java.util.Random;
25  
26  import org.apache.commons.cli.CommandLine;
27  import org.apache.commons.cli.CommandLineParser;
28  import org.apache.commons.cli.GnuParser;
29  import org.apache.commons.cli.HelpFormatter;
30  import org.apache.commons.cli.Option;
31  import org.apache.commons.cli.Options;
32  import org.apache.commons.logging.Log;
33  import org.apache.commons.logging.LogFactory;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FileSystem;
36  import org.apache.hadoop.fs.Path;
37  import org.apache.hadoop.hbase.HBaseTestingUtility;
38  import org.apache.hadoop.hbase.HColumnDescriptor;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.HTableDescriptor;
41  import org.apache.hadoop.hbase.testclassification.LargeTests;
42  import org.apache.hadoop.hbase.MiniHBaseCluster;
43  import org.apache.hadoop.hbase.TableName;
44  import org.apache.hadoop.hbase.client.HTable;
45  import org.apache.hadoop.hbase.client.Put;
46  import org.apache.hadoop.hbase.client.Result;
47  import org.apache.hadoop.hbase.client.ResultScanner;
48  import org.apache.hadoop.hbase.client.Scan;
49  import org.apache.hadoop.hbase.client.Table;
50  import org.apache.hadoop.hbase.filter.CompareFilter;
51  import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
52  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
53  import org.apache.hadoop.hbase.util.Bytes;
54  import org.junit.Test;
55  import org.junit.experimental.categories.Category;
56  
57  
58  /**
59   * Test performance improvement of joined scanners optimization:
60   * https://issues.apache.org/jira/browse/HBASE-5416
61   */
62  @Category(LargeTests.class)
63  public class TestJoinedScanners {
64    static final Log LOG = LogFactory.getLog(TestJoinedScanners.class);
65  
66    private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
67    private static final String DIR = TEST_UTIL.getDataTestDir("TestJoinedScanners").toString();
68  
69    private static final byte[] cf_essential = Bytes.toBytes("essential");
70    private static final byte[] cf_joined = Bytes.toBytes("joined");
71    private static final byte[] col_name = Bytes.toBytes("a");
72    private static final byte[] flag_yes = Bytes.toBytes("Y");
73    private static final byte[] flag_no  = Bytes.toBytes("N");
74  
75    private static DataBlockEncoding blockEncoding = DataBlockEncoding.FAST_DIFF;
76    private static int selectionRatio = 30;
77    private static int valueWidth = 128 * 1024;
78  
79    @Test
80    public void testJoinedScanners() throws Exception {
81      String dataNodeHosts[] = new String[] { "host1", "host2", "host3" };
82      int regionServersCount = 3;
83  
84      HBaseTestingUtility htu = new HBaseTestingUtility();
85  
86      final int DEFAULT_BLOCK_SIZE = 1024*1024;
87      htu.getConfiguration().setLong("dfs.blocksize", DEFAULT_BLOCK_SIZE);
88      htu.getConfiguration().setInt("dfs.replication", 1);
89      htu.getConfiguration().setLong("hbase.hregion.max.filesize", 322122547200L);
90      MiniHBaseCluster cluster = null;
91  
92      try {
93        cluster = htu.startMiniCluster(1, regionServersCount, dataNodeHosts);
94        byte [][] families = {cf_essential, cf_joined};
95  
96        TableName tableName = TableName.valueOf(this.getClass().getSimpleName());
97        HTableDescriptor desc = new HTableDescriptor(tableName);
98        for(byte[] family : families) {
99          HColumnDescriptor hcd = new HColumnDescriptor(family);
100         hcd.setDataBlockEncoding(blockEncoding);
101         desc.addFamily(hcd);
102       }
103       htu.getHBaseAdmin().createTable(desc);
104       Table ht = new HTable(htu.getConfiguration(), tableName);
105 
106       long rows_to_insert = 1000;
107       int insert_batch = 20;
108       long time = System.nanoTime();
109       Random rand = new Random(time);
110 
111       LOG.info("Make " + Long.toString(rows_to_insert) + " rows, total size = "
112         + Float.toString(rows_to_insert * valueWidth / 1024 / 1024) + " MB");
113 
114       byte [] val_large = new byte[valueWidth];
115 
116       List<Put> puts = new ArrayList<Put>();
117 
118       for (long i = 0; i < rows_to_insert; i++) {
119         Put put = new Put(Bytes.toBytes(Long.toString (i)));
120         if (rand.nextInt(100) <= selectionRatio) {
121           put.add(cf_essential, col_name, flag_yes);
122         } else {
123           put.add(cf_essential, col_name, flag_no);
124         }
125         put.add(cf_joined, col_name, val_large);
126         puts.add(put);
127         if (puts.size() >= insert_batch) {
128           ht.put(puts);
129           puts.clear();
130         }
131       }
132       if (puts.size() >= 0) {
133         ht.put(puts);
134         puts.clear();
135       }
136 
137       LOG.info("Data generated in "
138         + Double.toString((System.nanoTime() - time) / 1000000000.0) + " seconds");
139 
140       boolean slow = true;
141       for (int i = 0; i < 10; ++i) {
142         runScanner(ht, slow);
143         slow = !slow;
144       }
145 
146       ht.close();
147     } finally {
148       if (cluster != null) {
149         htu.shutdownMiniCluster();
150       }
151     }
152   }
153 
154   private void runScanner(Table table, boolean slow) throws Exception {
155     long time = System.nanoTime();
156     Scan scan = new Scan();
157     scan.addColumn(cf_essential, col_name);
158     scan.addColumn(cf_joined, col_name);
159 
160     SingleColumnValueFilter filter = new SingleColumnValueFilter(
161         cf_essential, col_name, CompareFilter.CompareOp.EQUAL, flag_yes);
162     filter.setFilterIfMissing(true);
163     scan.setFilter(filter);
164     scan.setLoadColumnFamiliesOnDemand(!slow);
165 
166     ResultScanner result_scanner = table.getScanner(scan);
167     Result res;
168     long rows_count = 0;
169     while ((res = result_scanner.next()) != null) {
170       rows_count++;
171     }
172 
173     double timeSec = (System.nanoTime() - time) / 1000000000.0;
174     result_scanner.close();
175     LOG.info((slow ? "Slow" : "Joined") + " scanner finished in " + Double.toString(timeSec)
176       + " seconds, got " + Long.toString(rows_count/2) + " rows");
177   }
178 
179   private static HRegion initHRegion(byte[] tableName, byte[] startKey, byte[] stopKey,
180       String callingMethod, Configuration conf, byte[]... families)
181       throws IOException {
182     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
183     for(byte [] family : families) {
184       HColumnDescriptor hcd = new HColumnDescriptor(family);
185       hcd.setDataBlockEncoding(DataBlockEncoding.FAST_DIFF);
186       htd.addFamily(hcd);
187     }
188     HRegionInfo info = new HRegionInfo(htd.getTableName(), startKey, stopKey, false);
189     Path path = new Path(DIR + callingMethod);
190     FileSystem fs = FileSystem.get(conf);
191     if (fs.exists(path)) {
192       if (!fs.delete(path, true)) {
193         throw new IOException("Failed delete of " + path);
194       }
195     }
196     return HRegion.createHRegion(info, path, conf, htd);
197   }
198 
199   private static Options options = new Options();
200 
201   /**
202    * Command line interface:
203    * @param args
204    * @throws IOException if there is a bug while reading from disk
205    */
206   public static void main(final String[] args) throws Exception {
207     Option encodingOption = new Option("e", "blockEncoding", true,
208       "Data block encoding; Default: FAST_DIFF");
209     encodingOption.setRequired(false);
210     options.addOption(encodingOption);
211     
212     Option ratioOption = new Option("r", "selectionRatio", true,
213       "Ratio of selected rows using essential column family");
214     ratioOption.setRequired(false);
215     options.addOption(ratioOption);
216     
217     Option widthOption = new Option("w", "valueWidth", true,
218       "Width of value for non-essential column family");
219     widthOption.setRequired(false);
220     options.addOption(widthOption);
221     
222     CommandLineParser parser = new GnuParser();
223     CommandLine cmd = parser.parse(options, args);
224     if (args.length < 1) {
225       HelpFormatter formatter = new HelpFormatter();
226       formatter.printHelp("TestJoinedScanners", options, true);
227     }
228     
229     if (cmd.hasOption("e")) {
230       blockEncoding = DataBlockEncoding.valueOf(cmd.getOptionValue("e"));
231     }
232     if (cmd.hasOption("r")) {
233       selectionRatio = Integer.parseInt(cmd.getOptionValue("r"));
234     }
235     if (cmd.hasOption("w")) {
236       valueWidth = Integer.parseInt(cmd.getOptionValue("w"));
237     }
238     // run the test
239     TestJoinedScanners test = new TestJoinedScanners();
240     test.testJoinedScanners();
241   }
242 }