1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase;
20
21 import java.io.IOException;
22 import java.security.SecureRandom;
23 import java.util.Random;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27 import org.apache.commons.math.random.RandomData;
28 import org.apache.commons.math.random.RandomDataImpl;
29 import org.apache.hadoop.conf.Configuration;
30 import org.apache.hadoop.fs.FileSystem;
31 import org.apache.hadoop.fs.Path;
32 import org.apache.hadoop.hbase.classification.InterfaceAudience;
33 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
34 import org.apache.hadoop.hbase.io.crypto.Encryption;
35 import org.apache.hadoop.hbase.io.crypto.KeyProviderForTesting;
36 import org.apache.hadoop.hbase.io.crypto.aes.AES;
37 import org.apache.hadoop.hbase.io.hfile.AbstractHFileWriter;
38 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
39 import org.apache.hadoop.hbase.io.hfile.HFile;
40 import org.apache.hadoop.hbase.io.hfile.HFileContext;
41 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
42 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
43 import org.apache.hadoop.hbase.util.Bytes;
44
45
46
47
48 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
49 public class HFilePerformanceEvaluation {
50 private static final int ROW_LENGTH = 10;
51 private static final int ROW_COUNT = 1000000;
52 private static final int RFILE_BLOCKSIZE = 8 * 1024;
53 private static StringBuilder testSummary = new StringBuilder();
54
55
56 static {
57 System.setProperty("org.apache.commons.logging.Log",
58 "org.apache.commons.logging.impl.SimpleLog");
59 System.setProperty("org.apache.commons.logging.simplelog.log.org.apache.hadoop.io.compress.CodecPool",
60 "WARN");
61 }
62
63 static final Log LOG =
64 LogFactory.getLog(HFilePerformanceEvaluation.class.getName());
65
66 static byte [] format(final int i) {
67 String v = Integer.toString(i);
68 return Bytes.toBytes("0000000000".substring(v.length()) + v);
69 }
70
71 static ImmutableBytesWritable format(final int i, ImmutableBytesWritable w) {
72 w.set(format(i));
73 return w;
74 }
75
76 static Cell createCell(final int i) {
77 return createCell(i, HConstants.EMPTY_BYTE_ARRAY);
78 }
79
80
81
82
83
84
85
86
87 static Cell createCell(final int i, final byte [] value) {
88 return createCell(format(i), value);
89 }
90
91 static Cell createCell(final byte [] keyRow) {
92 return CellUtil.createCell(keyRow);
93 }
94
95 static Cell createCell(final byte [] keyRow, final byte [] value) {
96 return CellUtil.createCell(keyRow, value);
97 }
98
99
100
101
102
103
104 private void runBenchmarks() throws Exception {
105 final Configuration conf = new Configuration();
106 final FileSystem fs = FileSystem.get(conf);
107 final Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile"));
108
109
110 runWriteBenchmark(conf, fs, mf, "none", "none");
111 runReadBenchmark(conf, fs, mf, "none", "none");
112
113
114 runWriteBenchmark(conf, fs, mf, "gz", "none");
115 runReadBenchmark(conf, fs, mf, "gz", "none");
116
117
118 final Configuration aesconf = new Configuration();
119 aesconf.set(HConstants.CRYPTO_KEYPROVIDER_CONF_KEY, KeyProviderForTesting.class.getName());
120 aesconf.set(HConstants.CRYPTO_MASTERKEY_NAME_CONF_KEY, "hbase");
121 aesconf.setInt("hfile.format.version", 3);
122 final FileSystem aesfs = FileSystem.get(aesconf);
123 final Path aesmf = aesfs.makeQualified(new Path("performanceevaluation.aes.mapfile"));
124
125
126 runWriteBenchmark(aesconf, aesfs, aesmf, "none", "aes");
127 runReadBenchmark(aesconf, aesfs, aesmf, "none", "aes");
128
129
130 runWriteBenchmark(aesconf, aesfs, aesmf, "gz", "aes");
131 runReadBenchmark(aesconf, aesfs, aesmf, "gz", "aes");
132
133
134 if (fs.exists(mf)) {
135 fs.delete(mf, true);
136 }
137 if (aesfs.exists(aesmf)) {
138 aesfs.delete(aesmf, true);
139 }
140
141
142 LOG.info("\n***************\n" + "Result Summary" + "\n***************\n");
143 LOG.info(testSummary.toString());
144
145 }
146
147
148
149
150
151
152
153
154
155
156 private void runWriteBenchmark(Configuration conf, FileSystem fs, Path mf, String codec,
157 String cipher) throws Exception {
158 if (fs.exists(mf)) {
159 fs.delete(mf, true);
160 }
161
162 runBenchmark(new SequentialWriteBenchmark(conf, fs, mf, ROW_COUNT, codec, cipher),
163 ROW_COUNT, codec, cipher);
164
165 }
166
167
168
169
170
171
172
173
174
175 private void runReadBenchmark(final Configuration conf, final FileSystem fs, final Path mf,
176 final String codec, final String cipher) {
177 PerformanceEvaluationCommons.concurrentReads(new Runnable() {
178 @Override
179 public void run() {
180 try {
181 runBenchmark(new UniformRandomSmallScan(conf, fs, mf, ROW_COUNT),
182 ROW_COUNT, codec, cipher);
183 } catch (Exception e) {
184 testSummary.append("UniformRandomSmallScan failed " + e.getMessage());
185 e.printStackTrace();
186 }
187 }
188 });
189
190 PerformanceEvaluationCommons.concurrentReads(new Runnable() {
191 @Override
192 public void run() {
193 try {
194 runBenchmark(new UniformRandomReadBenchmark(conf, fs, mf, ROW_COUNT),
195 ROW_COUNT, codec, cipher);
196 } catch (Exception e) {
197 testSummary.append("UniformRandomReadBenchmark failed " + e.getMessage());
198 e.printStackTrace();
199 }
200 }
201 });
202
203 PerformanceEvaluationCommons.concurrentReads(new Runnable() {
204 @Override
205 public void run() {
206 try {
207 runBenchmark(new GaussianRandomReadBenchmark(conf, fs, mf, ROW_COUNT),
208 ROW_COUNT, codec, cipher);
209 } catch (Exception e) {
210 testSummary.append("GaussianRandomReadBenchmark failed " + e.getMessage());
211 e.printStackTrace();
212 }
213 }
214 });
215
216 PerformanceEvaluationCommons.concurrentReads(new Runnable() {
217 @Override
218 public void run() {
219 try {
220 runBenchmark(new SequentialReadBenchmark(conf, fs, mf, ROW_COUNT),
221 ROW_COUNT, codec, cipher);
222 } catch (Exception e) {
223 testSummary.append("SequentialReadBenchmark failed " + e.getMessage());
224 e.printStackTrace();
225 }
226 }
227 });
228
229 }
230
231 protected void runBenchmark(RowOrientedBenchmark benchmark, int rowCount,
232 String codec, String cipher) throws Exception {
233 LOG.info("Running " + benchmark.getClass().getSimpleName() + " with codec[" +
234 codec + "] " + "cipher[" + cipher + "] for " + rowCount + " rows.");
235
236 long elapsedTime = benchmark.run();
237
238 LOG.info("Running " + benchmark.getClass().getSimpleName() + " with codec[" +
239 codec + "] " + "cipher[" + cipher + "] for " + rowCount + " rows took " +
240 elapsedTime + "ms.");
241
242
243 testSummary.append("Running ").append(benchmark.getClass().getSimpleName())
244 .append(" with codec[").append(codec).append("] cipher[").append(cipher)
245 .append("] for ").append(rowCount).append(" rows took ").append(elapsedTime)
246 .append("ms.").append("\n");
247 }
248
249 static abstract class RowOrientedBenchmark {
250
251 protected final Configuration conf;
252 protected final FileSystem fs;
253 protected final Path mf;
254 protected final int totalRows;
255 protected String codec = "none";
256 protected String cipher = "none";
257
258 public RowOrientedBenchmark(Configuration conf, FileSystem fs, Path mf,
259 int totalRows, String codec, String cipher) {
260 this.conf = conf;
261 this.fs = fs;
262 this.mf = mf;
263 this.totalRows = totalRows;
264 this.codec = codec;
265 this.cipher = cipher;
266 }
267
268 public RowOrientedBenchmark(Configuration conf, FileSystem fs, Path mf,
269 int totalRows) {
270 this.conf = conf;
271 this.fs = fs;
272 this.mf = mf;
273 this.totalRows = totalRows;
274 }
275
276 void setUp() throws Exception {
277
278 }
279
280 abstract void doRow(int i) throws Exception;
281
282 protected int getReportingPeriod() {
283 return this.totalRows / 10;
284 }
285
286 void tearDown() throws Exception {
287
288 }
289
290
291
292
293
294
295 long run() throws Exception {
296 long elapsedTime;
297 setUp();
298 long startTime = System.currentTimeMillis();
299 try {
300 for (int i = 0; i < totalRows; i++) {
301 if (i > 0 && i % getReportingPeriod() == 0) {
302 LOG.info("Processed " + i + " rows.");
303 }
304 doRow(i);
305 }
306 elapsedTime = System.currentTimeMillis() - startTime;
307 } finally {
308 tearDown();
309 }
310 return elapsedTime;
311 }
312
313 }
314
315 static class SequentialWriteBenchmark extends RowOrientedBenchmark {
316 protected HFile.Writer writer;
317 private Random random = new Random();
318 private byte[] bytes = new byte[ROW_LENGTH];
319
320 public SequentialWriteBenchmark(Configuration conf, FileSystem fs, Path mf,
321 int totalRows, String codec, String cipher) {
322 super(conf, fs, mf, totalRows, codec, cipher);
323 }
324
325 @Override
326 void setUp() throws Exception {
327
328 HFileContextBuilder builder = new HFileContextBuilder()
329 .withCompression(AbstractHFileWriter.compressionByName(codec))
330 .withBlockSize(RFILE_BLOCKSIZE);
331
332 if (cipher == "aes") {
333 byte[] cipherKey = new byte[AES.KEY_LENGTH];
334 new SecureRandom().nextBytes(cipherKey);
335 builder.withEncryptionContext(Encryption.newContext(conf)
336 .setCipher(Encryption.getCipher(conf, cipher))
337 .setKey(cipherKey));
338 } else if (!"none".equals(cipher)) {
339 throw new IOException("Cipher " + cipher + " not supported.");
340 }
341
342 HFileContext hFileContext = builder.build();
343
344 writer = HFile.getWriterFactoryNoCache(conf)
345 .withPath(fs, mf)
346 .withFileContext(hFileContext)
347 .withComparator(new KeyValue.RawBytesComparator())
348 .create();
349 }
350
351 @Override
352 void doRow(int i) throws Exception {
353 writer.append(createCell(i, generateValue()));
354 }
355
356 private byte[] generateValue() {
357 random.nextBytes(bytes);
358 return bytes;
359 }
360
361 @Override
362 protected int getReportingPeriod() {
363 return this.totalRows;
364 }
365
366 @Override
367 void tearDown() throws Exception {
368 writer.close();
369 }
370
371 }
372
373 static abstract class ReadBenchmark extends RowOrientedBenchmark {
374
375 protected HFile.Reader reader;
376
377 public ReadBenchmark(Configuration conf, FileSystem fs, Path mf,
378 int totalRows) {
379 super(conf, fs, mf, totalRows);
380 }
381
382 @Override
383 void setUp() throws Exception {
384 reader = HFile.createReader(this.fs, this.mf, new CacheConfig(this.conf), this.conf);
385 this.reader.loadFileInfo();
386 }
387
388 @Override
389 void tearDown() throws Exception {
390 reader.close();
391 }
392
393 }
394
395 static class SequentialReadBenchmark extends ReadBenchmark {
396 private HFileScanner scanner;
397
398 public SequentialReadBenchmark(Configuration conf, FileSystem fs,
399 Path mf, int totalRows) {
400 super(conf, fs, mf, totalRows);
401 }
402
403 @Override
404 void setUp() throws Exception {
405 super.setUp();
406 this.scanner = this.reader.getScanner(false, false);
407 this.scanner.seekTo();
408 }
409
410 @Override
411 void doRow(int i) throws Exception {
412 if (this.scanner.next()) {
413
414 Cell c = this.scanner.getKeyValue();
415 PerformanceEvaluationCommons.assertKey(format(i + 1), c);
416 PerformanceEvaluationCommons.assertValueSize(c.getValueLength(), ROW_LENGTH);
417 }
418 }
419
420 @Override
421 protected int getReportingPeriod() {
422 return this.totalRows;
423 }
424
425 }
426
427 static class UniformRandomReadBenchmark extends ReadBenchmark {
428
429 private Random random = new Random();
430
431 public UniformRandomReadBenchmark(Configuration conf, FileSystem fs,
432 Path mf, int totalRows) {
433 super(conf, fs, mf, totalRows);
434 }
435
436 @Override
437 void doRow(int i) throws Exception {
438 HFileScanner scanner = this.reader.getScanner(false, true);
439 byte [] b = getRandomRow();
440 if (scanner.seekTo(createCell(b)) < 0) {
441 LOG.info("Not able to seekTo " + new String(b));
442 return;
443 }
444
445 Cell c = scanner.getKeyValue();
446 PerformanceEvaluationCommons.assertKey(b, c);
447 PerformanceEvaluationCommons.assertValueSize(c.getValueLength(), ROW_LENGTH);
448 }
449
450 private byte [] getRandomRow() {
451 return format(random.nextInt(totalRows));
452 }
453 }
454
455 static class UniformRandomSmallScan extends ReadBenchmark {
456 private Random random = new Random();
457
458 public UniformRandomSmallScan(Configuration conf, FileSystem fs,
459 Path mf, int totalRows) {
460 super(conf, fs, mf, totalRows/10);
461 }
462
463 @Override
464 void doRow(int i) throws Exception {
465 HFileScanner scanner = this.reader.getScanner(false, false);
466 byte [] b = getRandomRow();
467
468 Cell c = createCell(b);
469 if (scanner.seekTo(c) != 0) {
470 LOG.info("Nonexistent row: " + new String(b));
471 return;
472 }
473
474 c = scanner.getKeyValue();
475
476
477 PerformanceEvaluationCommons.assertKey(b, c);
478 for (int ii = 0; ii < 30; ii++) {
479 if (!scanner.next()) {
480 LOG.info("NOTHING FOLLOWS");
481 return;
482 }
483 c = scanner.getKeyValue();
484 PerformanceEvaluationCommons.assertValueSize(c.getValueLength(), ROW_LENGTH);
485 }
486 }
487
488 private byte [] getRandomRow() {
489 return format(random.nextInt(totalRows));
490 }
491 }
492
493 static class GaussianRandomReadBenchmark extends ReadBenchmark {
494
495 private RandomData randomData = new RandomDataImpl();
496
497 public GaussianRandomReadBenchmark(Configuration conf, FileSystem fs,
498 Path mf, int totalRows) {
499 super(conf, fs, mf, totalRows);
500 }
501
502 @Override
503 void doRow(int i) throws Exception {
504 HFileScanner scanner = this.reader.getScanner(false, true);
505 byte[] gaussianRandomRowBytes = getGaussianRandomRowBytes();
506 scanner.seekTo(createCell(gaussianRandomRowBytes));
507 for (int ii = 0; ii < 30; ii++) {
508 if (!scanner.next()) {
509 LOG.info("NOTHING FOLLOWS");
510 return;
511 }
512
513 scanner.getKeyValue();
514 }
515 }
516
517 private byte [] getGaussianRandomRowBytes() {
518 int r = (int) randomData.nextGaussian((double)totalRows / 2.0,
519 (double)totalRows / 10.0);
520
521 return format(Math.min(totalRows, Math.max(r,0)));
522 }
523 }
524
525
526
527
528
529
530 public static void main(String[] args) throws Exception {
531 new HFilePerformanceEvaluation().runBenchmarks();
532 }
533 }