1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.util;
20
21 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
22 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
23 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
24 import static org.junit.Assert.assertEquals;
25 import static org.junit.Assert.assertFalse;
26 import static org.junit.Assert.assertNotEquals;
27 import static org.junit.Assert.assertNotNull;
28 import static org.junit.Assert.assertTrue;
29 import static org.junit.Assert.fail;
30
31 import java.io.IOException;
32 import java.util.ArrayList;
33 import java.util.Arrays;
34 import java.util.Collection;
35 import java.util.HashMap;
36 import java.util.HashSet;
37 import java.util.LinkedList;
38 import java.util.List;
39 import java.util.Map;
40 import java.util.NavigableMap;
41 import java.util.Set;
42 import java.util.UUID;
43 import java.util.concurrent.Callable;
44 import java.util.concurrent.CountDownLatch;
45 import java.util.concurrent.ExecutorService;
46 import java.util.concurrent.Executors;
47 import java.util.concurrent.Future;
48 import java.util.concurrent.ScheduledThreadPoolExecutor;
49 import java.util.concurrent.SynchronousQueue;
50 import java.util.concurrent.ThreadPoolExecutor;
51 import java.util.concurrent.TimeUnit;
52 import java.util.concurrent.atomic.AtomicBoolean;
53
54 import org.apache.commons.io.IOUtils;
55 import org.apache.commons.logging.Log;
56 import org.apache.commons.logging.LogFactory;
57 import org.apache.hadoop.conf.Configuration;
58 import org.apache.hadoop.fs.FileStatus;
59 import org.apache.hadoop.fs.FileSystem;
60 import org.apache.hadoop.fs.Path;
61 import org.apache.hadoop.hbase.ClusterStatus;
62 import org.apache.hadoop.hbase.HBaseTestingUtility;
63 import org.apache.hadoop.hbase.HColumnDescriptor;
64 import org.apache.hadoop.hbase.HConstants;
65 import org.apache.hadoop.hbase.HRegionInfo;
66 import org.apache.hadoop.hbase.HRegionLocation;
67 import org.apache.hadoop.hbase.HTableDescriptor;
68 import org.apache.hadoop.hbase.TableExistsException;
69 import org.apache.hadoop.hbase.io.HFileLink;
70 import org.apache.hadoop.hbase.io.hfile.HFile;
71 import org.apache.hadoop.hbase.io.hfile.HFileContext;
72 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
73 import org.apache.hadoop.hbase.testclassification.LargeTests;
74 import org.apache.hadoop.hbase.MiniHBaseCluster;
75 import org.apache.hadoop.hbase.ServerName;
76 import org.apache.hadoop.hbase.TableName;
77 import org.apache.hadoop.hbase.MetaTableAccessor;
78 import org.apache.hadoop.hbase.client.Admin;
79 import org.apache.hadoop.hbase.client.ClusterConnection;
80 import org.apache.hadoop.hbase.client.Connection;
81 import org.apache.hadoop.hbase.client.ConnectionFactory;
82 import org.apache.hadoop.hbase.client.Delete;
83 import org.apache.hadoop.hbase.client.Durability;
84 import org.apache.hadoop.hbase.client.Get;
85 import org.apache.hadoop.hbase.client.HBaseAdmin;
86 import org.apache.hadoop.hbase.client.HConnection;
87 import org.apache.hadoop.hbase.client.HTable;
88 import org.apache.hadoop.hbase.client.MetaScanner;
89 import org.apache.hadoop.hbase.client.Put;
90 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
91 import org.apache.hadoop.hbase.client.Result;
92 import org.apache.hadoop.hbase.client.ResultScanner;
93 import org.apache.hadoop.hbase.client.Scan;
94 import org.apache.hadoop.hbase.client.Table;
95 import org.apache.hadoop.hbase.coprocessor.BaseMasterObserver;
96 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
97 import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
98 import org.apache.hadoop.hbase.coprocessor.ObserverContext;
99 import org.apache.hadoop.hbase.io.hfile.TestHFile;
100 import org.apache.hadoop.hbase.master.AssignmentManager;
101 import org.apache.hadoop.hbase.master.HMaster;
102 import org.apache.hadoop.hbase.master.RegionState;
103 import org.apache.hadoop.hbase.master.RegionStates;
104 import org.apache.hadoop.hbase.master.TableLockManager;
105 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
106 import org.apache.hadoop.hbase.mob.MobFileName;
107 import org.apache.hadoop.hbase.mob.MobUtils;
108 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
109 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
110 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
111 import org.apache.hadoop.hbase.regionserver.HRegion;
112 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
113 import org.apache.hadoop.hbase.regionserver.HRegionServer;
114 import org.apache.hadoop.hbase.regionserver.SplitTransactionImpl;
115 import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
116 import org.apache.hadoop.hbase.testclassification.LargeTests;
117 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
118 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
119 import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
120 import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
121 import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
122 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
123 import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
124 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
125 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
126 import org.apache.zookeeper.KeeperException;
127 import org.junit.AfterClass;
128 import org.junit.Assert;
129 import org.junit.Before;
130 import org.junit.BeforeClass;
131 import org.junit.Ignore;
132 import org.junit.Test;
133 import org.junit.experimental.categories.Category;
134 import org.junit.rules.TestName;
135
136 import com.google.common.collect.Multimap;
137
138
139
140
141 @Category(LargeTests.class)
142 public class TestHBaseFsck {
143 static final int POOL_SIZE = 7;
144
145 final static Log LOG = LogFactory.getLog(TestHBaseFsck.class);
146 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
147 private final static Configuration conf = TEST_UTIL.getConfiguration();
148 private final static String FAM_STR = "fam";
149 private final static byte[] FAM = Bytes.toBytes(FAM_STR);
150 private final static int REGION_ONLINE_TIMEOUT = 800;
151 private static RegionStates regionStates;
152 private static ExecutorService tableExecutorService;
153 private static ScheduledThreadPoolExecutor hbfsckExecutorService;
154 private static ClusterConnection connection;
155 private static Admin admin;
156
157
158 private HTable tbl;
159 private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
160 Bytes.toBytes("B"), Bytes.toBytes("C") };
161
162 private final static byte[][] ROWKEYS= new byte[][] {
163 Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
164 Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
165
166 @BeforeClass
167 public static void setUpBeforeClass() throws Exception {
168 TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
169 MasterSyncObserver.class.getName());
170
171 conf.setInt("hbase.regionserver.handler.count", 2);
172 conf.setInt("hbase.regionserver.metahandler.count", 30);
173
174 conf.setInt("hbase.htable.threads.max", POOL_SIZE);
175 conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
176 conf.setInt("hbase.hconnection.threads.core", POOL_SIZE);
177 conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
178 conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT);
179 TEST_UTIL.startMiniCluster(3);
180
181 tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
182 new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
183
184 hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
185
186 AssignmentManager assignmentManager =
187 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
188 regionStates = assignmentManager.getRegionStates();
189
190 connection = (ClusterConnection) TEST_UTIL.getConnection();
191
192 admin = connection.getAdmin();
193 admin.setBalancerRunning(false, true);
194
195 TEST_UTIL.waitUntilAllSystemRegionsAssigned();
196
197 }
198
199 @AfterClass
200 public static void tearDownAfterClass() throws Exception {
201 tableExecutorService.shutdown();
202 hbfsckExecutorService.shutdown();
203 admin.close();
204 TEST_UTIL.shutdownMiniCluster();
205 }
206
207 @Before
208 public void setUp() {
209 EnvironmentEdgeManager.reset();
210 }
211
212 @Test (timeout=180000)
213 public void testHBaseFsck() throws Exception {
214 assertNoErrors(doFsck(conf, false));
215 TableName table = TableName.valueOf("tableBadMetaAssign");
216 HTableDescriptor desc = new HTableDescriptor(table);
217 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
218 desc.addFamily(hcd);
219 createTable(TEST_UTIL, desc, null);
220
221
222 assertNoErrors(doFsck(conf, false));
223
224
225
226 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
227 Scan scan = new Scan();
228 scan.setStartRow(Bytes.toBytes(table+",,"));
229 ResultScanner scanner = meta.getScanner(scan);
230 HRegionInfo hri = null;
231
232 Result res = scanner.next();
233 ServerName currServer =
234 ServerName.parseFrom(res.getValue(HConstants.CATALOG_FAMILY,
235 HConstants.SERVER_QUALIFIER));
236 long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
237 HConstants.STARTCODE_QUALIFIER));
238
239 for (JVMClusterUtil.RegionServerThread rs :
240 TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
241
242 ServerName sn = rs.getRegionServer().getServerName();
243
244
245 if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) ||
246 startCode != sn.getStartcode()) {
247 Put put = new Put(res.getRow());
248 put.setDurability(Durability.SKIP_WAL);
249 put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
250 Bytes.toBytes(sn.getHostAndPort()));
251 put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
252 Bytes.toBytes(sn.getStartcode()));
253 meta.put(put);
254 hri = MetaTableAccessor.getHRegionInfo(res);
255 break;
256 }
257 }
258
259
260 assertErrors(doFsck(conf, true), new ERROR_CODE[]{
261 ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
262
263 TEST_UTIL.getHBaseCluster().getMaster()
264 .getAssignmentManager().waitForAssignment(hri);
265
266
267 assertNoErrors(doFsck(conf, false));
268
269
270 Table t = connection.getTable(table, tableExecutorService);
271 ResultScanner s = t.getScanner(new Scan());
272 s.close();
273 t.close();
274
275 scanner.close();
276 meta.close();
277 }
278
279 @Test(timeout=180000)
280 public void testFixAssignmentsWhenMETAinTransition() throws Exception {
281 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
282 admin.closeRegion(cluster.getServerHoldingMeta(), HRegionInfo.FIRST_META_REGIONINFO);
283 regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
284 new MetaTableLocator().deleteMetaLocation(cluster.getMaster().getZooKeeper());
285 assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
286 HBaseFsck hbck = doFsck(conf, true);
287 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNKNOWN, ERROR_CODE.NO_META_REGION,
288 ERROR_CODE.NULL_META_REGION });
289 assertNoErrors(doFsck(conf, false));
290 }
291
292
293
294
295 private HRegionInfo createRegion(final HTableDescriptor
296 htd, byte[] startKey, byte[] endKey)
297 throws IOException {
298 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
299 HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey);
300 MetaTableAccessor.addRegionToMeta(meta, hri);
301 meta.close();
302 return hri;
303 }
304
305
306
307
308 private void dumpMeta(TableName tableName) throws IOException {
309 List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
310 for (byte[] row : metaRows) {
311 LOG.info(Bytes.toString(row));
312 }
313 }
314
315
316
317
318
319 private void undeployRegion(Connection conn, ServerName sn,
320 HRegionInfo hri) throws IOException, InterruptedException {
321 try {
322 HBaseFsckRepair.closeRegionSilentlyAndWait((HConnection) conn, sn, hri);
323 if (!hri.isMetaTable()) {
324 admin.offline(hri.getRegionName());
325 }
326 } catch (IOException ioe) {
327 LOG.warn("Got exception when attempting to offline region "
328 + Bytes.toString(hri.getRegionName()), ioe);
329 }
330 }
331
332
333
334
335
336
337 private void deleteRegion(Configuration conf, final HTableDescriptor htd,
338 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
339 boolean hdfs) throws IOException, InterruptedException {
340 deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false, HRegionInfo.DEFAULT_REPLICA_ID);
341 }
342
343
344
345
346
347
348
349
350
351 private void deleteRegion(Configuration conf, final HTableDescriptor htd,
352 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
353 boolean hdfs, boolean regionInfoOnly, int replicaId)
354 throws IOException, InterruptedException {
355 LOG.info("** Before delete:");
356 dumpMeta(htd.getTableName());
357
358 List<HRegionLocation> locations = tbl.getAllRegionLocations();
359 for (HRegionLocation location : locations) {
360 HRegionInfo hri = location.getRegionInfo();
361 ServerName hsa = location.getServerName();
362 if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
363 && Bytes.compareTo(hri.getEndKey(), endKey) == 0
364 && hri.getReplicaId() == replicaId) {
365
366 LOG.info("RegionName: " +hri.getRegionNameAsString());
367 byte[] deleteRow = hri.getRegionName();
368
369 if (unassign) {
370 LOG.info("Undeploying region " + hri + " from server " + hsa);
371 undeployRegion(connection, hsa, hri);
372 }
373
374 if (regionInfoOnly) {
375 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
376 Path rootDir = FSUtils.getRootDir(conf);
377 FileSystem fs = rootDir.getFileSystem(conf);
378 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
379 hri.getEncodedName());
380 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
381 fs.delete(hriPath, true);
382 }
383
384 if (hdfs) {
385 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
386 Path rootDir = FSUtils.getRootDir(conf);
387 FileSystem fs = rootDir.getFileSystem(conf);
388 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
389 hri.getEncodedName());
390 HBaseFsck.debugLsr(conf, p);
391 boolean success = fs.delete(p, true);
392 LOG.info("Deleted " + p + " sucessfully? " + success);
393 HBaseFsck.debugLsr(conf, p);
394 }
395
396 if (metaRow) {
397 try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
398 Delete delete = new Delete(deleteRow);
399 meta.delete(delete);
400 }
401 }
402 }
403 LOG.info(hri.toString() + hsa.toString());
404 }
405
406 TEST_UTIL.getMetaTableRows(htd.getTableName());
407 LOG.info("*** After delete:");
408 dumpMeta(htd.getTableName());
409 }
410
411
412
413
414
415
416
417
418
419
420 void setupTable(TableName tablename) throws Exception {
421 setupTableWithRegionReplica(tablename, 1);
422 }
423
424
425
426
427
428
429
430
431
432
433 void setupTableWithRegionReplica(TableName tablename, int replicaCount) throws Exception {
434 HTableDescriptor desc = new HTableDescriptor(tablename);
435 desc.setRegionReplication(replicaCount);
436 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
437 desc.addFamily(hcd);
438 createTable(TEST_UTIL, desc, SPLITS);
439
440 tbl = (HTable) connection.getTable(tablename, tableExecutorService);
441 List<Put> puts = new ArrayList<Put>();
442 for (byte[] row : ROWKEYS) {
443 Put p = new Put(row);
444 p.add(FAM, Bytes.toBytes("val"), row);
445 puts.add(p);
446 }
447 tbl.put(puts);
448 tbl.flushCommits();
449 }
450
451
452
453
454
455
456
457 void setupMobTable(TableName tablename) throws Exception {
458 HTableDescriptor desc = new HTableDescriptor(tablename);
459 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
460 hcd.setMobEnabled(true);
461 hcd.setMobThreshold(0);
462 desc.addFamily(hcd);
463 createTable(TEST_UTIL, desc, SPLITS);
464
465 tbl = (HTable) connection.getTable(tablename, tableExecutorService);
466 List<Put> puts = new ArrayList<Put>();
467 for (byte[] row : ROWKEYS) {
468 Put p = new Put(row);
469 p.add(FAM, Bytes.toBytes("val"), row);
470 puts.add(p);
471 }
472 tbl.put(puts);
473 tbl.flushCommits();
474 }
475
476
477
478
479 int countRows() throws IOException {
480 Scan s = new Scan();
481 ResultScanner rs = tbl.getScanner(s);
482 int i = 0;
483 while(rs.next() !=null) {
484 i++;
485 }
486 return i;
487 }
488
489
490
491
492 int countRows(byte[] start, byte[] end) throws IOException {
493 Scan s = new Scan(start, end);
494 ResultScanner rs = tbl.getScanner(s);
495 int i = 0;
496 while (rs.next() != null) {
497 i++;
498 }
499 return i;
500 }
501
502
503
504
505
506
507
508 void cleanupTable(TableName tablename) throws Exception {
509 if (tbl != null) {
510 tbl.close();
511 tbl = null;
512 }
513
514 ((ClusterConnection) connection).clearRegionCache();
515 deleteTable(TEST_UTIL, tablename);
516 }
517
518
519
520
521 @Test (timeout=180000)
522 public void testHBaseFsckClean() throws Exception {
523 assertNoErrors(doFsck(conf, false));
524 TableName table = TableName.valueOf("tableClean");
525 try {
526 HBaseFsck hbck = doFsck(conf, false);
527 assertNoErrors(hbck);
528
529 setupTable(table);
530 assertEquals(ROWKEYS.length, countRows());
531
532
533 hbck = doFsck(conf, false);
534 assertNoErrors(hbck);
535 assertEquals(0, hbck.getOverlapGroups(table).size());
536 assertEquals(ROWKEYS.length, countRows());
537 } finally {
538 cleanupTable(table);
539 }
540 }
541
542
543
544
545 @Test (timeout=180000)
546 public void testHbckThreadpooling() throws Exception {
547 TableName table =
548 TableName.valueOf("tableDupeStartKey");
549 try {
550
551 setupTable(table);
552
553
554 Configuration newconf = new Configuration(conf);
555 newconf.setInt("hbasefsck.numthreads", 1);
556 assertNoErrors(doFsck(newconf, false));
557
558
559 } finally {
560 cleanupTable(table);
561 }
562 }
563
564 @Test (timeout=180000)
565 public void testHbckFixOrphanTable() throws Exception {
566 TableName table = TableName.valueOf("tableInfo");
567 FileSystem fs = null;
568 Path tableinfo = null;
569 try {
570 setupTable(table);
571
572 Path hbaseTableDir = FSUtils.getTableDir(
573 FSUtils.getRootDir(conf), table);
574 fs = hbaseTableDir.getFileSystem(conf);
575 FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
576 tableinfo = status.getPath();
577 fs.rename(tableinfo, new Path("/.tableinfo"));
578
579
580 HBaseFsck hbck = doFsck(conf, false);
581 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
582
583
584 hbck = doFsck(conf, true);
585 assertNoErrors(hbck);
586 status = null;
587 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
588 assertNotNull(status);
589
590 HTableDescriptor htd = admin.getTableDescriptor(table);
591 htd.setValue("NOT_DEFAULT", "true");
592 admin.disableTable(table);
593 admin.modifyTable(table, htd);
594 admin.enableTable(table);
595 fs.delete(status.getPath(), true);
596
597
598 htd = admin.getTableDescriptor(table);
599 hbck = doFsck(conf, true);
600 assertNoErrors(hbck);
601 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
602 assertNotNull(status);
603 htd = admin.getTableDescriptor(table);
604 assertEquals(htd.getValue("NOT_DEFAULT"), "true");
605 } finally {
606 fs.rename(new Path("/.tableinfo"), tableinfo);
607 cleanupTable(table);
608 }
609 }
610
611
612
613
614
615
616 @Test (timeout=180000)
617 public void testParallelHbck() throws Exception {
618 final ExecutorService service;
619 final Future<HBaseFsck> hbck1,hbck2;
620
621 class RunHbck implements Callable<HBaseFsck>{
622 boolean fail = true;
623 @Override
624 public HBaseFsck call(){
625 Configuration c = new Configuration(conf);
626 c.setInt("hbase.hbck.lockfile.attempts", 1);
627
628
629 c.setInt("hbase.hbck.lockfile.maxwaittime", 3);
630 try{
631 return doFsck(c, false);
632 } catch(Exception e){
633 if (e.getMessage().contains("Duplicate hbck")) {
634 fail = false;
635 }
636 }
637
638 if (fail) fail();
639 return null;
640 }
641 }
642 service = Executors.newFixedThreadPool(2);
643 hbck1 = service.submit(new RunHbck());
644 hbck2 = service.submit(new RunHbck());
645 service.shutdown();
646
647 service.awaitTermination(15, TimeUnit.SECONDS);
648 HBaseFsck h1 = hbck1.get();
649 HBaseFsck h2 = hbck2.get();
650
651 assert(h1 == null || h2 == null);
652 if (h1 != null) {
653 assert(h1.getRetCode() >= 0);
654 }
655 if (h2 != null) {
656 assert(h2.getRetCode() >= 0);
657 }
658 }
659
660
661
662
663
664
665
666 @Test (timeout=180000)
667 public void testParallelWithRetriesHbck() throws Exception {
668 final ExecutorService service;
669 final Future<HBaseFsck> hbck1,hbck2;
670
671
672
673
674
675
676
677 final int timeoutInSeconds = 80;
678 final int sleepIntervalInMilliseconds = 200;
679 final int maxSleepTimeInMilliseconds = 6000;
680 final int maxRetryAttempts = 15;
681
682 class RunHbck implements Callable<HBaseFsck>{
683
684 @Override
685 public HBaseFsck call() throws Exception {
686
687 Configuration c = new Configuration(conf);
688 c.setInt("hbase.hbck.lockfile.maxwaittime", timeoutInSeconds);
689 c.setInt("hbase.hbck.lockfile.attempt.sleep.interval", sleepIntervalInMilliseconds);
690 c.setInt("hbase.hbck.lockfile.attempt.maxsleeptime", maxSleepTimeInMilliseconds);
691 c.setInt("hbase.hbck.lockfile.attempts", maxRetryAttempts);
692 return doFsck(c, false);
693 }
694 }
695
696 service = Executors.newFixedThreadPool(2);
697 hbck1 = service.submit(new RunHbck());
698 hbck2 = service.submit(new RunHbck());
699 service.shutdown();
700
701 service.awaitTermination(timeoutInSeconds * 2, TimeUnit.SECONDS);
702 HBaseFsck h1 = hbck1.get();
703 HBaseFsck h2 = hbck2.get();
704
705 assertNotNull(h1);
706 assertNotNull(h2);
707 assert(h1.getRetCode() >= 0);
708 assert(h2.getRetCode() >= 0);
709
710 }
711
712
713
714
715
716 @Test (timeout=180000)
717 public void testDupeStartKey() throws Exception {
718 TableName table =
719 TableName.valueOf("tableDupeStartKey");
720 try {
721 setupTable(table);
722 assertNoErrors(doFsck(conf, false));
723 assertEquals(ROWKEYS.length, countRows());
724
725
726 HRegionInfo hriDupe =
727 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("A2"));
728 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
729 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
730 .waitForAssignment(hriDupe);
731 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
732 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
733
734 HBaseFsck hbck = doFsck(conf, false);
735 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
736 ERROR_CODE.DUPE_STARTKEYS});
737 assertEquals(2, hbck.getOverlapGroups(table).size());
738 assertEquals(ROWKEYS.length, countRows());
739
740
741 doFsck(conf,true);
742
743
744 HBaseFsck hbck2 = doFsck(conf,false);
745 assertNoErrors(hbck2);
746 assertEquals(0, hbck2.getOverlapGroups(table).size());
747 assertEquals(ROWKEYS.length, countRows());
748 } finally {
749 cleanupTable(table);
750 }
751 }
752
753
754
755
756
757 @Test (timeout=180000)
758 public void testHbckWithRegionReplica() throws Exception {
759 TableName table =
760 TableName.valueOf("testHbckWithRegionReplica");
761 try {
762 setupTableWithRegionReplica(table, 2);
763 TEST_UTIL.getHBaseAdmin().flush(table.getName());
764 assertNoErrors(doFsck(conf, false));
765 } finally {
766 cleanupTable(table);
767 }
768 }
769
770 @Test
771 public void testHbckWithFewerReplica() throws Exception {
772 TableName table =
773 TableName.valueOf("testHbckWithFewerReplica");
774 try {
775 setupTableWithRegionReplica(table, 2);
776 TEST_UTIL.getHBaseAdmin().flush(table.getName());
777 assertNoErrors(doFsck(conf, false));
778 assertEquals(ROWKEYS.length, countRows());
779 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
780 Bytes.toBytes("C"), true, false, false, false, 1);
781
782 HBaseFsck hbck = doFsck(conf, false);
783 assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.NOT_DEPLOYED});
784
785 hbck = doFsck(conf, true);
786
787 hbck = doFsck(conf, false);
788 assertErrors(hbck, new ERROR_CODE[]{});
789 } finally {
790 cleanupTable(table);
791 }
792 }
793
794 @Test
795 public void testHbckWithExcessReplica() throws Exception {
796 TableName table =
797 TableName.valueOf("testHbckWithExcessReplica");
798 try {
799 setupTableWithRegionReplica(table, 2);
800 TEST_UTIL.getHBaseAdmin().flush(table.getName());
801 assertNoErrors(doFsck(conf, false));
802 assertEquals(ROWKEYS.length, countRows());
803
804
805
806
807 HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
808 List<HRegionInfo> regions = TEST_UTIL.getHBaseAdmin().getTableRegions(table);
809 byte[] startKey = Bytes.toBytes("B");
810 byte[] endKey = Bytes.toBytes("C");
811 byte[] metaKey = null;
812 HRegionInfo newHri = null;
813 for (HRegionInfo h : regions) {
814 if (Bytes.compareTo(h.getStartKey(), startKey) == 0 &&
815 Bytes.compareTo(h.getEndKey(), endKey) == 0 &&
816 h.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
817 metaKey = h.getRegionName();
818
819 newHri = RegionReplicaUtil.getRegionInfoForReplica(h, 2);
820 break;
821 }
822 }
823 Put put = new Put(metaKey);
824 ServerName sn = TEST_UTIL.getHBaseAdmin().getClusterStatus().getServers()
825 .toArray(new ServerName[0])[0];
826
827 MetaTableAccessor.addLocation(put, sn, sn.getStartcode(), -1, 2);
828 meta.put(put);
829 meta.flushCommits();
830
831 HBaseFsckRepair.fixUnassigned((HBaseAdmin)TEST_UTIL.getHBaseAdmin(), newHri);
832 HBaseFsckRepair.waitUntilAssigned((HBaseAdmin)TEST_UTIL.getHBaseAdmin(), newHri);
833
834 Delete delete = new Delete(metaKey);
835 delete.deleteColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(2));
836 delete.deleteColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(2));
837 delete.deleteColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getSeqNumColumn(2));
838 meta.delete(delete);
839 meta.flushCommits();
840 meta.close();
841
842 HBaseFsck hbck = doFsck(conf, false);
843 assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.NOT_IN_META});
844
845 hbck = doFsck(conf, true);
846
847 hbck = doFsck(conf, false);
848 assertErrors(hbck, new ERROR_CODE[]{});
849 } finally {
850 cleanupTable(table);
851 }
852 }
853
854
855
856 Map<ServerName, List<String>> getDeployedHRIs(final HBaseAdmin admin) throws IOException {
857 ClusterStatus status = admin.getClusterStatus();
858 Collection<ServerName> regionServers = status.getServers();
859 Map<ServerName, List<String>> mm =
860 new HashMap<ServerName, List<String>>();
861 for (ServerName hsi : regionServers) {
862 AdminProtos.AdminService.BlockingInterface server = ((HConnection) connection).getAdmin(hsi);
863
864
865 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
866 List<String> regionNames = new ArrayList<String>();
867 for (HRegionInfo hri : regions) {
868 regionNames.add(hri.getRegionNameAsString());
869 }
870 mm.put(hsi, regionNames);
871 }
872 return mm;
873 }
874
875
876
877
878 ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
879 for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
880 if (e.getValue().contains(hri.getRegionNameAsString())) {
881 return e.getKey();
882 }
883 }
884 return null;
885 }
886
887
888
889
890
891 @Test (timeout=180000)
892 public void testDupeRegion() throws Exception {
893 TableName table =
894 TableName.valueOf("tableDupeRegion");
895 try {
896 setupTable(table);
897 assertNoErrors(doFsck(conf, false));
898 assertEquals(ROWKEYS.length, countRows());
899
900
901 HRegionInfo hriDupe =
902 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"));
903
904 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
905 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
906 .waitForAssignment(hriDupe);
907 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
908 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
909
910
911
912
913
914 while (findDeployedHSI(getDeployedHRIs((HBaseAdmin) admin), hriDupe) == null) {
915 Thread.sleep(250);
916 }
917
918 LOG.debug("Finished assignment of dupe region");
919
920
921 HBaseFsck hbck = doFsck(conf, false);
922 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
923 ERROR_CODE.DUPE_STARTKEYS});
924 assertEquals(2, hbck.getOverlapGroups(table).size());
925 assertEquals(ROWKEYS.length, countRows());
926
927
928 doFsck(conf,true);
929
930
931 HBaseFsck hbck2 = doFsck(conf,false);
932 assertNoErrors(hbck2);
933 assertEquals(0, hbck2.getOverlapGroups(table).size());
934 assertEquals(ROWKEYS.length, countRows());
935 } finally {
936 cleanupTable(table);
937 }
938 }
939
940
941
942
943 @Test (timeout=180000)
944 public void testDegenerateRegions() throws Exception {
945 TableName table = TableName.valueOf("tableDegenerateRegions");
946 try {
947 setupTable(table);
948 assertNoErrors(doFsck(conf,false));
949 assertEquals(ROWKEYS.length, countRows());
950
951
952 HRegionInfo hriDupe =
953 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("B"));
954 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
955 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
956 .waitForAssignment(hriDupe);
957 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
958 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
959
960 HBaseFsck hbck = doFsck(conf,false);
961 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION, ERROR_CODE.DUPE_STARTKEYS,
962 ERROR_CODE.DUPE_STARTKEYS });
963 assertEquals(2, hbck.getOverlapGroups(table).size());
964 assertEquals(ROWKEYS.length, countRows());
965
966
967 doFsck(conf,true);
968
969
970 HBaseFsck hbck2 = doFsck(conf,false);
971 assertNoErrors(hbck2);
972 assertEquals(0, hbck2.getOverlapGroups(table).size());
973 assertEquals(ROWKEYS.length, countRows());
974 } finally {
975 cleanupTable(table);
976 }
977 }
978
979
980
981
982
983 @Test (timeout=180000)
984 public void testContainedRegionOverlap() throws Exception {
985 TableName table =
986 TableName.valueOf("tableContainedRegionOverlap");
987 try {
988 setupTable(table);
989 assertEquals(ROWKEYS.length, countRows());
990
991
992 HRegionInfo hriOverlap =
993 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
994 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
995 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
996 .waitForAssignment(hriOverlap);
997 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
998 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
999
1000 HBaseFsck hbck = doFsck(conf, false);
1001 assertErrors(hbck, new ERROR_CODE[] {
1002 ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
1003 assertEquals(2, hbck.getOverlapGroups(table).size());
1004 assertEquals(ROWKEYS.length, countRows());
1005
1006
1007 doFsck(conf, true);
1008
1009
1010 HBaseFsck hbck2 = doFsck(conf,false);
1011 assertNoErrors(hbck2);
1012 assertEquals(0, hbck2.getOverlapGroups(table).size());
1013 assertEquals(ROWKEYS.length, countRows());
1014 } finally {
1015 cleanupTable(table);
1016 }
1017 }
1018
1019
1020
1021
1022
1023
1024
1025 @Test (timeout=180000)
1026 public void testSidelineOverlapRegion() throws Exception {
1027 TableName table =
1028 TableName.valueOf("testSidelineOverlapRegion");
1029 try {
1030 setupTable(table);
1031 assertEquals(ROWKEYS.length, countRows());
1032
1033
1034 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1035 HMaster master = cluster.getMaster();
1036 HRegionInfo hriOverlap1 =
1037 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("AB"));
1038 master.assignRegion(hriOverlap1);
1039 master.getAssignmentManager().waitForAssignment(hriOverlap1);
1040 HRegionInfo hriOverlap2 =
1041 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("AB"), Bytes.toBytes("B"));
1042 master.assignRegion(hriOverlap2);
1043 master.getAssignmentManager().waitForAssignment(hriOverlap2);
1044
1045 HBaseFsck hbck = doFsck(conf, false);
1046 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS,
1047 ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
1048 assertEquals(3, hbck.getOverlapGroups(table).size());
1049 assertEquals(ROWKEYS.length, countRows());
1050
1051
1052 Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
1053 ServerName serverName = null;
1054 byte[] regionName = null;
1055 for (HbckInfo hbi: overlapGroups.values()) {
1056 if ("A".equals(Bytes.toString(hbi.getStartKey()))
1057 && "B".equals(Bytes.toString(hbi.getEndKey()))) {
1058 regionName = hbi.getRegionName();
1059
1060
1061 int k = cluster.getServerWith(regionName);
1062 for (int i = 0; i < 3; i++) {
1063 if (i != k) {
1064 HRegionServer rs = cluster.getRegionServer(i);
1065 serverName = rs.getServerName();
1066 break;
1067 }
1068 }
1069
1070 HBaseFsckRepair.closeRegionSilentlyAndWait((HConnection) connection,
1071 cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
1072 admin.offline(regionName);
1073 break;
1074 }
1075 }
1076
1077 assertNotNull(regionName);
1078 assertNotNull(serverName);
1079 try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
1080 Put put = new Put(regionName);
1081 put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
1082 Bytes.toBytes(serverName.getHostAndPort()));
1083 meta.put(put);
1084 }
1085
1086
1087 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
1088 fsck.connect();
1089 fsck.setDisplayFullReport();
1090 fsck.setTimeLag(0);
1091 fsck.setFixAssignments(true);
1092 fsck.setFixMeta(true);
1093 fsck.setFixHdfsHoles(true);
1094 fsck.setFixHdfsOverlaps(true);
1095 fsck.setFixHdfsOrphans(true);
1096 fsck.setFixVersionFile(true);
1097 fsck.setSidelineBigOverlaps(true);
1098 fsck.setMaxMerge(2);
1099 fsck.onlineHbck();
1100 fsck.close();
1101
1102
1103
1104 HBaseFsck hbck2 = doFsck(conf,false);
1105 assertNoErrors(hbck2);
1106 assertEquals(0, hbck2.getOverlapGroups(table).size());
1107 assertTrue(ROWKEYS.length > countRows());
1108 } finally {
1109 cleanupTable(table);
1110 }
1111 }
1112
1113
1114
1115
1116
1117 @Test (timeout=180000)
1118 public void testOverlapAndOrphan() throws Exception {
1119 TableName table =
1120 TableName.valueOf("tableOverlapAndOrphan");
1121 try {
1122 setupTable(table);
1123 assertEquals(ROWKEYS.length, countRows());
1124
1125
1126 admin.disableTable(table);
1127 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1128 Bytes.toBytes("B"), true, true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
1129 TEST_UTIL.getHBaseAdmin().enableTable(table);
1130
1131 HRegionInfo hriOverlap =
1132 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
1133 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
1134 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
1135 .waitForAssignment(hriOverlap);
1136 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1137 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1138
1139 HBaseFsck hbck = doFsck(conf, false);
1140 assertErrors(hbck, new ERROR_CODE[] {
1141 ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1142 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1143
1144
1145 doFsck(conf, true);
1146
1147
1148 HBaseFsck hbck2 = doFsck(conf,false);
1149 assertNoErrors(hbck2);
1150 assertEquals(0, hbck2.getOverlapGroups(table).size());
1151 assertEquals(ROWKEYS.length, countRows());
1152 } finally {
1153 cleanupTable(table);
1154 }
1155 }
1156
1157
1158
1159
1160
1161
1162 @Test (timeout=180000)
1163 public void testCoveredStartKey() throws Exception {
1164 TableName table =
1165 TableName.valueOf("tableCoveredStartKey");
1166 try {
1167 setupTable(table);
1168 assertEquals(ROWKEYS.length, countRows());
1169
1170
1171 HRegionInfo hriOverlap =
1172 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2"));
1173 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
1174 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
1175 .waitForAssignment(hriOverlap);
1176 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1177 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1178
1179 HBaseFsck hbck = doFsck(conf, false);
1180 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
1181 ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
1182 assertEquals(3, hbck.getOverlapGroups(table).size());
1183 assertEquals(ROWKEYS.length, countRows());
1184
1185
1186 doFsck(conf, true);
1187
1188
1189 HBaseFsck hbck2 = doFsck(conf, false);
1190 assertErrors(hbck2, new ERROR_CODE[0]);
1191 assertEquals(0, hbck2.getOverlapGroups(table).size());
1192 assertEquals(ROWKEYS.length, countRows());
1193 } finally {
1194 cleanupTable(table);
1195 }
1196 }
1197
1198
1199
1200
1201
1202 @Test (timeout=180000)
1203 public void testRegionHole() throws Exception {
1204 TableName table =
1205 TableName.valueOf("tableRegionHole");
1206 try {
1207 setupTable(table);
1208 assertEquals(ROWKEYS.length, countRows());
1209
1210
1211 admin.disableTable(table);
1212 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1213 Bytes.toBytes("C"), true, true, true);
1214 admin.enableTable(table);
1215
1216 HBaseFsck hbck = doFsck(conf, false);
1217 assertErrors(hbck, new ERROR_CODE[] {
1218 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1219
1220 assertEquals(0, hbck.getOverlapGroups(table).size());
1221
1222
1223 doFsck(conf, true);
1224
1225
1226 assertNoErrors(doFsck(conf,false));
1227 assertEquals(ROWKEYS.length - 2 , countRows());
1228 } finally {
1229 cleanupTable(table);
1230 }
1231 }
1232
1233
1234
1235
1236
1237 @Test (timeout=180000)
1238 public void testHDFSRegioninfoMissing() throws Exception {
1239 TableName table = TableName.valueOf("tableHDFSRegioninfoMissing");
1240 try {
1241 setupTable(table);
1242 assertEquals(ROWKEYS.length, countRows());
1243
1244
1245 admin.disableTable(table);
1246 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1247 Bytes.toBytes("C"), true, true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
1248 TEST_UTIL.getHBaseAdmin().enableTable(table);
1249
1250 HBaseFsck hbck = doFsck(conf, false);
1251 assertErrors(hbck, new ERROR_CODE[] {
1252 ERROR_CODE.ORPHAN_HDFS_REGION,
1253 ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1254 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1255
1256 assertEquals(0, hbck.getOverlapGroups(table).size());
1257
1258
1259 doFsck(conf, true);
1260
1261
1262 assertNoErrors(doFsck(conf, false));
1263 assertEquals(ROWKEYS.length, countRows());
1264 } finally {
1265 cleanupTable(table);
1266 }
1267 }
1268
1269
1270
1271
1272
1273
1274 @Test(timeout = 180000)
1275 public void testHDFSRegioninfoMissingAndCheckRegionBoundary() throws Exception {
1276 TableName table = TableName.valueOf("testHDFSRegioninfoMissingAndCheckRegionBoundary");
1277 try {
1278 setupTable(table);
1279 assertEquals(ROWKEYS.length, countRows());
1280
1281
1282 admin.disableTable(table);
1283 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
1284 true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
1285 admin.enableTable(table);
1286
1287 HBaseFsck hbck = doFsck(conf, false);
1288 assertErrors(hbck,
1289 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1290 HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION,
1291 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1292 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
1293
1294 assertEquals(0, hbck.getOverlapGroups(table).size());
1295
1296
1297 doFsck(conf, true);
1298
1299
1300 assertNoErrors(doFsck(conf, false));
1301
1302
1303 for (int i = 0; i < ROWKEYS.length; i++) {
1304 if (i != ROWKEYS.length - 1) {
1305 assertEquals(1, countRows(ROWKEYS[i], ROWKEYS[i + 1]));
1306 } else {
1307 assertEquals(1, countRows(ROWKEYS[i], null));
1308 }
1309 }
1310
1311 } finally {
1312 cleanupTable(table);
1313 }
1314 }
1315
1316
1317
1318
1319
1320 @Test (timeout=180000)
1321 public void testNotInMetaOrDeployedHole() throws Exception {
1322 TableName table =
1323 TableName.valueOf("tableNotInMetaOrDeployedHole");
1324 try {
1325 setupTable(table);
1326 assertEquals(ROWKEYS.length, countRows());
1327
1328
1329 admin.disableTable(table);
1330 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1331 Bytes.toBytes("C"), true, true, false);
1332 admin.enableTable(table);
1333
1334 HBaseFsck hbck = doFsck(conf, false);
1335 assertErrors(hbck, new ERROR_CODE[] {
1336 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1337
1338 assertEquals(0, hbck.getOverlapGroups(table).size());
1339
1340
1341 assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1342 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1343
1344
1345 assertNoErrors(doFsck(conf,false));
1346 assertEquals(ROWKEYS.length, countRows());
1347 } finally {
1348 cleanupTable(table);
1349 }
1350 }
1351
1352
1353
1354
1355 @Test (timeout=180000)
1356 public void testNotInMetaHole() throws Exception {
1357 TableName table =
1358 TableName.valueOf("tableNotInMetaHole");
1359 try {
1360 setupTable(table);
1361 assertEquals(ROWKEYS.length, countRows());
1362
1363
1364 admin.disableTable(table);
1365 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1366 Bytes.toBytes("C"), false, true, false);
1367 admin.enableTable(table);
1368
1369 HBaseFsck hbck = doFsck(conf, false);
1370 assertErrors(hbck, new ERROR_CODE[] {
1371 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1372
1373 assertEquals(0, hbck.getOverlapGroups(table).size());
1374
1375
1376 assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1377 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1378
1379
1380 assertNoErrors(doFsck(conf,false));
1381 assertEquals(ROWKEYS.length, countRows());
1382 } finally {
1383 cleanupTable(table);
1384 }
1385 }
1386
1387
1388
1389
1390
1391 @Test (timeout=180000)
1392 public void testNotInHdfs() throws Exception {
1393 TableName table =
1394 TableName.valueOf("tableNotInHdfs");
1395 try {
1396 setupTable(table);
1397 assertEquals(ROWKEYS.length, countRows());
1398
1399
1400 admin.flush(table);
1401
1402
1403 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1404 Bytes.toBytes("C"), false, false, true);
1405
1406 HBaseFsck hbck = doFsck(conf, false);
1407 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1408
1409 assertEquals(0, hbck.getOverlapGroups(table).size());
1410
1411
1412 doFsck(conf, true);
1413
1414
1415 assertNoErrors(doFsck(conf,false));
1416 assertEquals(ROWKEYS.length - 2, countRows());
1417 } finally {
1418 cleanupTable(table);
1419 }
1420 }
1421
1422
1423
1424
1425
1426 @Test (timeout=180000)
1427 public void testNotInHdfsWithReplicas() throws Exception {
1428 TableName table =
1429 TableName.valueOf("tableNotInHdfs");
1430 HBaseAdmin admin = new HBaseAdmin(conf);
1431 try {
1432 HRegionInfo[] oldHris = new HRegionInfo[2];
1433 setupTableWithRegionReplica(table, 2);
1434 assertEquals(ROWKEYS.length, countRows());
1435 NavigableMap<HRegionInfo, ServerName> map = MetaScanner.allTableRegions(TEST_UTIL.getConnection(),
1436 tbl.getName());
1437 int i = 0;
1438
1439 for (Map.Entry<HRegionInfo, ServerName> m : map.entrySet()) {
1440 if (m.getKey().getStartKey().length > 0 &&
1441 m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
1442 LOG.debug("Initially server hosting " + m.getKey() + " is " + m.getValue());
1443 oldHris[i++] = m.getKey();
1444 }
1445 }
1446
1447 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1448
1449
1450 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1451 Bytes.toBytes("C"), false, false, true);
1452
1453 HBaseFsck hbck = doFsck(conf, false);
1454 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1455
1456
1457 doFsck(conf, true);
1458
1459
1460 assertNoErrors(doFsck(conf,false));
1461 assertEquals(ROWKEYS.length - 2, countRows());
1462
1463
1464
1465 i = 0;
1466 HRegionInfo[] newHris = new HRegionInfo[2];
1467
1468 map = MetaScanner.allTableRegions(TEST_UTIL.getConnection(), tbl.getName());
1469
1470 for (Map.Entry<HRegionInfo, ServerName> m : map.entrySet()) {
1471 if (m.getKey().getStartKey().length > 0 &&
1472 m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
1473 newHris[i++] = m.getKey();
1474 }
1475 }
1476
1477 Collection<ServerName> servers = admin.getClusterStatus().getServers();
1478 Set<HRegionInfo> onlineRegions = new HashSet<HRegionInfo>();
1479 for (ServerName s : servers) {
1480 List<HRegionInfo> list = admin.getOnlineRegions(s);
1481 onlineRegions.addAll(list);
1482 }
1483
1484 assertTrue(onlineRegions.containsAll(Arrays.asList(newHris)));
1485
1486
1487 assertFalse(onlineRegions.removeAll(Arrays.asList(oldHris)));
1488 } finally {
1489 cleanupTable(table);
1490 admin.close();
1491 }
1492 }
1493
1494
1495
1496
1497
1498
1499 @Test (timeout=180000)
1500 public void testNoHdfsTable() throws Exception {
1501 TableName table = TableName.valueOf("NoHdfsTable");
1502 setupTable(table);
1503 assertEquals(ROWKEYS.length, countRows());
1504
1505
1506 admin.flush(table);
1507
1508
1509 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
1510 Bytes.toBytes("A"), false, false, true);
1511 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1512 Bytes.toBytes("B"), false, false, true);
1513 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1514 Bytes.toBytes("C"), false, false, true);
1515 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
1516 Bytes.toBytes(""), false, false, true);
1517
1518
1519 deleteTableDir(table);
1520
1521 HBaseFsck hbck = doFsck(conf, false);
1522 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
1523 ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
1524 ERROR_CODE.NOT_IN_HDFS,});
1525
1526 assertEquals(0, hbck.getOverlapGroups(table).size());
1527
1528
1529 doFsck(conf, true);
1530
1531
1532 assertNoErrors(doFsck(conf,false));
1533 assertFalse("Table " + table + " should have been deleted", admin.tableExists(table));
1534 }
1535
1536 public void deleteTableDir(TableName table) throws IOException {
1537 Path rootDir = FSUtils.getRootDir(conf);
1538 FileSystem fs = rootDir.getFileSystem(conf);
1539 Path p = FSUtils.getTableDir(rootDir, table);
1540 HBaseFsck.debugLsr(conf, p);
1541 boolean success = fs.delete(p, true);
1542 LOG.info("Deleted " + p + " sucessfully? " + success);
1543 }
1544
1545
1546
1547
1548 @Test (timeout=180000)
1549 public void testNoVersionFile() throws Exception {
1550
1551 Path rootDir = FSUtils.getRootDir(conf);
1552 FileSystem fs = rootDir.getFileSystem(conf);
1553 Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
1554 fs.delete(versionFile, true);
1555
1556
1557 HBaseFsck hbck = doFsck(conf, false);
1558 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_VERSION_FILE });
1559
1560 doFsck(conf, true);
1561
1562
1563 assertNoErrors(doFsck(conf, false));
1564 }
1565
1566
1567
1568
1569 @Test (timeout=180000)
1570 public void testRegionShouldNotBeDeployed() throws Exception {
1571 TableName table =
1572 TableName.valueOf("tableRegionShouldNotBeDeployed");
1573 try {
1574 LOG.info("Starting testRegionShouldNotBeDeployed.");
1575 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1576 assertTrue(cluster.waitForActiveAndReadyMaster());
1577
1578
1579 byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
1580 Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
1581 HTableDescriptor htdDisabled = new HTableDescriptor(table);
1582 htdDisabled.addFamily(new HColumnDescriptor(FAM));
1583
1584
1585 FSTableDescriptors fstd = new FSTableDescriptors(conf);
1586 fstd.createTableDescriptor(htdDisabled);
1587 List<HRegionInfo> disabledRegions =
1588 TEST_UTIL.createMultiRegionsInMeta(conf, htdDisabled, SPLIT_KEYS);
1589
1590
1591 HRegionServer hrs = cluster.getRegionServer(0);
1592
1593
1594 admin.disableTable(table);
1595 admin.enableTable(table);
1596
1597
1598 admin.disableTable(table);
1599 HRegionInfo region = disabledRegions.remove(0);
1600 byte[] regionName = region.getRegionName();
1601
1602
1603 assertTrue(cluster.getServerWith(regionName) == -1);
1604
1605
1606
1607
1608
1609 HRegion r = HRegion.openHRegion(
1610 region, htdDisabled, hrs.getWAL(region), conf);
1611 hrs.addToOnlineRegions(r);
1612
1613 HBaseFsck hbck = doFsck(conf, false);
1614 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
1615
1616
1617 doFsck(conf, true);
1618
1619
1620 assertNoErrors(doFsck(conf, false));
1621 } finally {
1622 admin.enableTable(table);
1623 cleanupTable(table);
1624 }
1625 }
1626
1627
1628
1629
1630 @Test (timeout=180000)
1631 public void testFixByTable() throws Exception {
1632 TableName table1 =
1633 TableName.valueOf("testFixByTable1");
1634 TableName table2 =
1635 TableName.valueOf("testFixByTable2");
1636 try {
1637 setupTable(table1);
1638
1639 admin.flush(table1);
1640
1641 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1642 Bytes.toBytes("C"), false, false, true);
1643
1644 setupTable(table2);
1645
1646 admin.flush(table2);
1647
1648 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1649 Bytes.toBytes("C"), false, false, true);
1650
1651 HBaseFsck hbck = doFsck(conf, false);
1652 assertErrors(hbck, new ERROR_CODE[] {
1653 ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS});
1654
1655
1656 doFsck(conf, true, table1);
1657
1658 assertNoErrors(doFsck(conf, false, table1));
1659
1660 assertErrors(doFsck(conf, false, table2),
1661 new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1662
1663
1664 doFsck(conf, true, table2);
1665
1666 assertNoErrors(doFsck(conf, false));
1667 assertEquals(ROWKEYS.length - 2, countRows());
1668 } finally {
1669 cleanupTable(table1);
1670 cleanupTable(table2);
1671 }
1672 }
1673
1674
1675
1676 @Test (timeout=180000)
1677 public void testLingeringSplitParent() throws Exception {
1678 TableName table =
1679 TableName.valueOf("testLingeringSplitParent");
1680 Table meta = null;
1681 try {
1682 setupTable(table);
1683 assertEquals(ROWKEYS.length, countRows());
1684
1685
1686 admin.flush(table);
1687 HRegionLocation location = tbl.getRegionLocation("B");
1688
1689
1690 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1691 Bytes.toBytes("C"), true, true, false);
1692
1693
1694 meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
1695 HRegionInfo hri = location.getRegionInfo();
1696
1697 HRegionInfo a = new HRegionInfo(tbl.getName(),
1698 Bytes.toBytes("B"), Bytes.toBytes("BM"));
1699 HRegionInfo b = new HRegionInfo(tbl.getName(),
1700 Bytes.toBytes("BM"), Bytes.toBytes("C"));
1701
1702 hri.setOffline(true);
1703 hri.setSplit(true);
1704
1705 MetaTableAccessor.addRegionToMeta(meta, hri, a, b);
1706 meta.close();
1707 admin.flush(TableName.META_TABLE_NAME);
1708
1709 HBaseFsck hbck = doFsck(conf, false);
1710 assertErrors(hbck, new ERROR_CODE[] {
1711 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1712
1713
1714 hbck = doFsck(conf, true);
1715 assertErrors(hbck, new ERROR_CODE[] {
1716 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN });
1717 assertFalse(hbck.shouldRerun());
1718 hbck = doFsck(conf, false);
1719 assertErrors(hbck, new ERROR_CODE[] {
1720 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1721
1722
1723 hbck = new HBaseFsck(conf, hbfsckExecutorService);
1724 hbck.connect();
1725 hbck.setDisplayFullReport();
1726 hbck.setTimeLag(0);
1727 hbck.setFixSplitParents(true);
1728 hbck.onlineHbck();
1729 assertTrue(hbck.shouldRerun());
1730 hbck.close();
1731
1732 Get get = new Get(hri.getRegionName());
1733 Result result = meta.get(get);
1734 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1735 HConstants.SPLITA_QUALIFIER).isEmpty());
1736 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1737 HConstants.SPLITB_QUALIFIER).isEmpty());
1738 admin.flush(TableName.META_TABLE_NAME);
1739
1740
1741 doFsck(conf, true);
1742
1743
1744 assertNoErrors(doFsck(conf, false));
1745 assertEquals(ROWKEYS.length, countRows());
1746 } finally {
1747 cleanupTable(table);
1748 IOUtils.closeQuietly(meta);
1749 }
1750 }
1751
1752
1753
1754
1755
1756 @Test (timeout=180000)
1757 public void testValidLingeringSplitParent() throws Exception {
1758 TableName table =
1759 TableName.valueOf("testLingeringSplitParent");
1760 Table meta = null;
1761 try {
1762 setupTable(table);
1763 assertEquals(ROWKEYS.length, countRows());
1764
1765
1766 admin.flush(table);
1767 HRegionLocation location = tbl.getRegionLocation(Bytes.toBytes("B"));
1768
1769 meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
1770 HRegionInfo hri = location.getRegionInfo();
1771
1772
1773 byte[] regionName = location.getRegionInfo().getRegionName();
1774 admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1775 TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
1776
1777
1778
1779
1780 HBaseFsck hbck = doFsck(
1781 conf, true, true, false, false, false, true, true, true, true, false, false, false, null);
1782 assertErrors(hbck, new ERROR_CODE[] {});
1783
1784
1785 Get get = new Get(hri.getRegionName());
1786 Result result = meta.get(get);
1787 assertNotNull(result);
1788 assertNotNull(MetaTableAccessor.getHRegionInfo(result));
1789
1790 assertEquals(ROWKEYS.length, countRows());
1791
1792
1793 assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1);
1794 assertNoErrors(doFsck(conf, false));
1795 } finally {
1796 cleanupTable(table);
1797 IOUtils.closeQuietly(meta);
1798 }
1799 }
1800
1801
1802
1803
1804
1805 @Test(timeout=75000)
1806 public void testSplitDaughtersNotInMeta() throws Exception {
1807 TableName table = TableName.valueOf("testSplitdaughtersNotInMeta");
1808 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
1809 try {
1810 setupTable(table);
1811 assertEquals(ROWKEYS.length, countRows());
1812
1813
1814 admin.flush(table);
1815 HRegionLocation location = tbl.getRegionLocation(Bytes.toBytes("B"));
1816
1817 HRegionInfo hri = location.getRegionInfo();
1818
1819
1820 byte[] regionName = location.getRegionInfo().getRegionName();
1821 admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1822 TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
1823
1824 PairOfSameType<HRegionInfo> daughters =
1825 MetaTableAccessor.getDaughterRegions(meta.get(new Get(regionName)));
1826
1827
1828 Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
1829 undeployRegion(connection, hris.get(daughters.getFirst()), daughters.getFirst());
1830 undeployRegion(connection, hris.get(daughters.getSecond()), daughters.getSecond());
1831
1832 List<Delete> deletes = new ArrayList<>();
1833 deletes.add(new Delete(daughters.getFirst().getRegionName()));
1834 deletes.add(new Delete(daughters.getSecond().getRegionName()));
1835 meta.delete(deletes);
1836
1837
1838 RegionStates regionStates = TEST_UTIL.getMiniHBaseCluster().getMaster().
1839 getAssignmentManager().getRegionStates();
1840 regionStates.deleteRegion(daughters.getFirst());
1841 regionStates.deleteRegion(daughters.getSecond());
1842
1843 HBaseFsck hbck = doFsck(conf, false);
1844 assertErrors(hbck,
1845 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1846 ERROR_CODE.HOLE_IN_REGION_CHAIN });
1847
1848
1849 hbck = doFsck(
1850 conf, true, true, false, false, false, false, false, false, false, false, false, false, null);
1851 assertErrors(hbck,
1852 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1853 ERROR_CODE.HOLE_IN_REGION_CHAIN });
1854
1855
1856 Get get = new Get(hri.getRegionName());
1857 Result result = meta.get(get);
1858 assertNotNull(result);
1859 assertNotNull(MetaTableAccessor.getHRegionInfo(result));
1860
1861 assertEquals(ROWKEYS.length, countRows());
1862
1863
1864 assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1);
1865 assertNoErrors(doFsck(conf, false));
1866 } finally {
1867 meta.close();
1868 cleanupTable(table);
1869 }
1870 }
1871
1872
1873
1874
1875
1876 @Test(timeout=120000)
1877 public void testMissingFirstRegion() throws Exception {
1878 TableName table = TableName.valueOf("testMissingFirstRegion");
1879 try {
1880 setupTable(table);
1881 assertEquals(ROWKEYS.length, countRows());
1882
1883
1884 admin.disableTable(table);
1885 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
1886 true, true);
1887 admin.enableTable(table);
1888
1889 HBaseFsck hbck = doFsck(conf, false);
1890 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
1891
1892 doFsck(conf, true);
1893
1894 assertNoErrors(doFsck(conf, false));
1895 } finally {
1896 cleanupTable(table);
1897 }
1898 }
1899
1900
1901
1902
1903
1904 @Test(timeout=120000)
1905 public void testRegionDeployedNotInHdfs() throws Exception {
1906 TableName table =
1907 TableName.valueOf("testSingleRegionDeployedNotInHdfs");
1908 try {
1909 setupTable(table);
1910 admin.flush(table);
1911
1912
1913 deleteRegion(conf, tbl.getTableDescriptor(),
1914 HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false,
1915 false, true);
1916
1917 HBaseFsck hbck = doFsck(conf, false);
1918 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
1919
1920 doFsck(conf, true);
1921
1922 assertNoErrors(doFsck(conf, false));
1923 } finally {
1924 cleanupTable(table);
1925 }
1926 }
1927
1928
1929
1930
1931
1932 @Test(timeout=120000)
1933 public void testMissingLastRegion() throws Exception {
1934 TableName table =
1935 TableName.valueOf("testMissingLastRegion");
1936 try {
1937 setupTable(table);
1938 assertEquals(ROWKEYS.length, countRows());
1939
1940
1941 admin.disableTable(table);
1942 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
1943 true, true);
1944 admin.enableTable(table);
1945
1946 HBaseFsck hbck = doFsck(conf, false);
1947 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
1948
1949 doFsck(conf, true);
1950
1951 assertNoErrors(doFsck(conf, false));
1952 } finally {
1953 cleanupTable(table);
1954 }
1955 }
1956
1957
1958
1959
1960 @Test (timeout=180000)
1961 public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
1962 TableName table =
1963 TableName.valueOf("testFixAssignmentsAndNoHdfsChecking");
1964 try {
1965 setupTable(table);
1966 assertEquals(ROWKEYS.length, countRows());
1967
1968
1969 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1970 Bytes.toBytes("B"), true, false, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
1971
1972
1973 HBaseFsck hbck = doFsck(conf, false);
1974 assertErrors(hbck, new ERROR_CODE[] {
1975 ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1976
1977
1978 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
1979 fsck.connect();
1980 fsck.setDisplayFullReport();
1981 fsck.setTimeLag(0);
1982 fsck.setCheckHdfs(false);
1983 fsck.onlineHbck();
1984 assertErrors(fsck, new ERROR_CODE[] {
1985 ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1986 fsck.close();
1987
1988
1989 fsck = new HBaseFsck(conf, hbfsckExecutorService);
1990 fsck.connect();
1991 fsck.setDisplayFullReport();
1992 fsck.setTimeLag(0);
1993 fsck.setCheckHdfs(false);
1994 fsck.setFixAssignments(true);
1995 fsck.onlineHbck();
1996 assertTrue(fsck.shouldRerun());
1997 fsck.onlineHbck();
1998 assertNoErrors(fsck);
1999
2000 assertEquals(ROWKEYS.length, countRows());
2001
2002 fsck.close();
2003 } finally {
2004 cleanupTable(table);
2005 }
2006 }
2007
2008
2009
2010
2011
2012
2013 @Test (timeout=180000)
2014 public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
2015 TableName table =
2016 TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking");
2017 try {
2018 setupTable(table);
2019 assertEquals(ROWKEYS.length, countRows());
2020
2021
2022 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
2023 Bytes.toBytes("B"), false, true, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
2024
2025
2026 HBaseFsck hbck = doFsck(conf, false);
2027 assertErrors(hbck,
2028 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
2029
2030
2031 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
2032 fsck.connect();
2033 fsck.setDisplayFullReport();
2034 fsck.setTimeLag(0);
2035 fsck.setCheckHdfs(false);
2036 fsck.onlineHbck();
2037 assertErrors(fsck,
2038 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
2039 fsck.close();
2040
2041
2042 fsck = new HBaseFsck(conf, hbfsckExecutorService);
2043 fsck.connect();
2044 fsck.setDisplayFullReport();
2045 fsck.setTimeLag(0);
2046 fsck.setCheckHdfs(false);
2047 fsck.setFixAssignments(true);
2048 fsck.setFixMeta(true);
2049 fsck.onlineHbck();
2050 assertFalse(fsck.shouldRerun());
2051 assertErrors(fsck,
2052 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
2053 fsck.close();
2054
2055
2056 fsck = doFsck(conf, true);
2057 assertTrue(fsck.shouldRerun());
2058 fsck = doFsck(conf, true);
2059 assertNoErrors(fsck);
2060 } finally {
2061 cleanupTable(table);
2062 }
2063 }
2064
2065
2066
2067
2068
2069 @Test (timeout=180000)
2070 public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
2071 TableName table =
2072 TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking");
2073 try {
2074 setupTable(table);
2075 assertEquals(ROWKEYS.length, countRows());
2076
2077
2078 admin.disableTable(table);
2079 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
2080 Bytes.toBytes("B"), true, true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
2081 TEST_UTIL.getHBaseAdmin().enableTable(table);
2082
2083 HRegionInfo hriOverlap =
2084 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
2085 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
2086 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
2087 .waitForAssignment(hriOverlap);
2088 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
2089 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
2090
2091 HBaseFsck hbck = doFsck(conf, false);
2092 assertErrors(hbck, new ERROR_CODE[] {
2093 ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
2094 ERROR_CODE.HOLE_IN_REGION_CHAIN});
2095
2096
2097 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
2098 fsck.connect();
2099 fsck.setDisplayFullReport();
2100 fsck.setTimeLag(0);
2101 fsck.setCheckHdfs(false);
2102 fsck.onlineHbck();
2103 assertErrors(fsck, new ERROR_CODE[] {
2104 ERROR_CODE.HOLE_IN_REGION_CHAIN});
2105 fsck.close();
2106
2107
2108 fsck = new HBaseFsck(conf, hbfsckExecutorService);
2109 fsck.connect();
2110 fsck.setDisplayFullReport();
2111 fsck.setTimeLag(0);
2112 fsck.setCheckHdfs(false);
2113 fsck.setFixHdfsHoles(true);
2114 fsck.setFixHdfsOverlaps(true);
2115 fsck.setFixHdfsOrphans(true);
2116 fsck.onlineHbck();
2117 assertFalse(fsck.shouldRerun());
2118 assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN});
2119 fsck.close();
2120 } finally {
2121 if (admin.isTableDisabled(table)) {
2122 admin.enableTable(table);
2123 }
2124 cleanupTable(table);
2125 }
2126 }
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136 Path getFlushedHFile(FileSystem fs, TableName table) throws IOException {
2137 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
2138 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
2139 Path famDir = new Path(regionDir, FAM_STR);
2140
2141
2142 while (true) {
2143 FileStatus[] hfFss = fs.listStatus(famDir);
2144 if (hfFss.length == 0) {
2145 continue;
2146 }
2147 for (FileStatus hfs : hfFss) {
2148 if (!hfs.isDirectory()) {
2149 return hfs.getPath();
2150 }
2151 }
2152 }
2153 }
2154
2155
2156
2157
2158
2159
2160
2161
2162 Path getFlushedMobFile(FileSystem fs, TableName table) throws IOException {
2163 Path regionDir = MobUtils.getMobRegionPath(conf, table);
2164 Path famDir = new Path(regionDir, FAM_STR);
2165
2166
2167 while (true) {
2168 FileStatus[] hfFss = fs.listStatus(famDir);
2169 if (hfFss.length == 0) {
2170 continue;
2171 }
2172 for (FileStatus hfs : hfFss) {
2173 if (!hfs.isDirectory()) {
2174 return hfs.getPath();
2175 }
2176 }
2177 }
2178 }
2179
2180
2181
2182
2183
2184
2185 String createMobFileName(String oldFileName) {
2186 MobFileName mobFileName = MobFileName.create(oldFileName);
2187 String startKey = mobFileName.getStartKey();
2188 String date = mobFileName.getDate();
2189 return MobFileName.create(startKey, date, UUID.randomUUID().toString().replaceAll("-", ""))
2190 .getFileName();
2191 }
2192
2193
2194
2195
2196 @Test(timeout=180000)
2197 public void testQuarantineCorruptHFile() throws Exception {
2198 TableName table = TableName.valueOf(name.getMethodName());
2199 try {
2200 setupTable(table);
2201 assertEquals(ROWKEYS.length, countRows());
2202 admin.flush(table);
2203
2204 FileSystem fs = FileSystem.get(conf);
2205 Path hfile = getFlushedHFile(fs, table);
2206
2207
2208 admin.disableTable(table);
2209
2210
2211 Path corrupt = new Path(hfile.getParent(), "deadbeef");
2212 TestHFile.truncateFile(fs, hfile, corrupt);
2213 LOG.info("Created corrupted file " + corrupt);
2214 HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
2215
2216
2217 HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
2218 assertEquals(res.getRetCode(), 0);
2219 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
2220 assertEquals(hfcc.getHFilesChecked(), 5);
2221 assertEquals(hfcc.getCorrupted().size(), 1);
2222 assertEquals(hfcc.getFailures().size(), 0);
2223 assertEquals(hfcc.getQuarantined().size(), 1);
2224 assertEquals(hfcc.getMissing().size(), 0);
2225
2226
2227 admin.enableTable(table);
2228 } finally {
2229 cleanupTable(table);
2230 }
2231 }
2232
2233
2234
2235
2236 @Test(timeout=180000)
2237 public void testQuarantineCorruptMobFile() throws Exception {
2238 TableName table = TableName.valueOf(name.getMethodName());
2239 try {
2240 setupMobTable(table);
2241 assertEquals(ROWKEYS.length, countRows());
2242 admin.flush(table);
2243
2244 FileSystem fs = FileSystem.get(conf);
2245 Path mobFile = getFlushedMobFile(fs, table);
2246 admin.disableTable(table);
2247
2248 String corruptMobFile = createMobFileName(mobFile.getName());
2249 Path corrupt = new Path(mobFile.getParent(), corruptMobFile);
2250 TestHFile.truncateFile(fs, mobFile, corrupt);
2251 LOG.info("Created corrupted mob file " + corrupt);
2252 HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
2253 HBaseFsck.debugLsr(conf, MobUtils.getMobHome(conf));
2254
2255
2256 admin.enableTable(table);
2257 HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
2258 assertEquals(res.getRetCode(), 0);
2259 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
2260 assertEquals(hfcc.getHFilesChecked(), 4);
2261 assertEquals(hfcc.getCorrupted().size(), 0);
2262 assertEquals(hfcc.getFailures().size(), 0);
2263 assertEquals(hfcc.getQuarantined().size(), 0);
2264 assertEquals(hfcc.getMissing().size(), 0);
2265 assertEquals(hfcc.getMobFilesChecked(), 5);
2266 assertEquals(hfcc.getCorruptedMobFiles().size(), 1);
2267 assertEquals(hfcc.getFailureMobFiles().size(), 0);
2268 assertEquals(hfcc.getQuarantinedMobFiles().size(), 1);
2269 assertEquals(hfcc.getMissedMobFiles().size(), 0);
2270 String quarantinedMobFile = hfcc.getQuarantinedMobFiles().iterator().next().getName();
2271 assertEquals(corruptMobFile, quarantinedMobFile);
2272 } finally {
2273 cleanupTable(table);
2274 }
2275 }
2276
2277
2278
2279
2280 private void doQuarantineTest(TableName table, HBaseFsck hbck, int check,
2281 int corrupt, int fail, int quar, int missing) throws Exception {
2282 try {
2283 setupTable(table);
2284 assertEquals(ROWKEYS.length, countRows());
2285 admin.flush(table);
2286
2287
2288 admin.disableTable(table);
2289
2290 String[] args = {"-sidelineCorruptHFiles", "-repairHoles", "-ignorePreCheckPermission",
2291 table.getNameAsString()};
2292 HBaseFsck res = hbck.exec(hbfsckExecutorService, args);
2293
2294 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
2295 assertEquals(hfcc.getHFilesChecked(), check);
2296 assertEquals(hfcc.getCorrupted().size(), corrupt);
2297 assertEquals(hfcc.getFailures().size(), fail);
2298 assertEquals(hfcc.getQuarantined().size(), quar);
2299 assertEquals(hfcc.getMissing().size(), missing);
2300
2301
2302 admin.enableTableAsync(table);
2303 while (!admin.isTableEnabled(table)) {
2304 try {
2305 Thread.sleep(250);
2306 } catch (InterruptedException e) {
2307 e.printStackTrace();
2308 fail("Interrupted when trying to enable table " + table);
2309 }
2310 }
2311 } finally {
2312 cleanupTable(table);
2313 }
2314 }
2315
2316
2317
2318
2319
2320 @Test(timeout=180000)
2321 public void testQuarantineMissingHFile() throws Exception {
2322 TableName table = TableName.valueOf(name.getMethodName());
2323
2324
2325 final FileSystem fs = FileSystem.get(conf);
2326 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
2327 @Override
2328 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
2329 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
2330 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
2331 @Override
2332 protected void checkHFile(Path p) throws IOException {
2333 if (attemptedFirstHFile.compareAndSet(false, true)) {
2334 assertTrue(fs.delete(p, true));
2335 }
2336 super.checkHFile(p);
2337 }
2338 };
2339 }
2340 };
2341 doQuarantineTest(table, hbck, 4, 0, 0, 0, 1);
2342 hbck.close();
2343 }
2344
2345
2346
2347
2348
2349
2350
2351 @Ignore @Test(timeout=180000)
2352 public void testQuarantineMissingFamdir() throws Exception {
2353 TableName table = TableName.valueOf(name.getMethodName());
2354
2355 final FileSystem fs = FileSystem.get(conf);
2356 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
2357 @Override
2358 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
2359 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
2360 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
2361 @Override
2362 protected void checkColFamDir(Path p) throws IOException {
2363 if (attemptedFirstHFile.compareAndSet(false, true)) {
2364 assertTrue(fs.delete(p, true));
2365 }
2366 super.checkColFamDir(p);
2367 }
2368 };
2369 }
2370 };
2371 doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
2372 hbck.close();
2373 }
2374
2375
2376
2377
2378
2379 @Test(timeout=180000)
2380 public void testQuarantineMissingRegionDir() throws Exception {
2381 TableName table = TableName.valueOf(name.getMethodName());
2382
2383 final FileSystem fs = FileSystem.get(conf);
2384 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
2385 @Override
2386 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
2387 throws IOException {
2388 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
2389 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
2390 @Override
2391 protected void checkRegionDir(Path p) throws IOException {
2392 if (attemptedFirstHFile.compareAndSet(false, true)) {
2393 assertTrue(fs.delete(p, true));
2394 }
2395 super.checkRegionDir(p);
2396 }
2397 };
2398 }
2399 };
2400 doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
2401 hbck.close();
2402 }
2403
2404
2405
2406
2407 @Test (timeout=180000)
2408 public void testLingeringReferenceFile() throws Exception {
2409 TableName table =
2410 TableName.valueOf("testLingeringReferenceFile");
2411 try {
2412 setupTable(table);
2413 assertEquals(ROWKEYS.length, countRows());
2414
2415
2416 FileSystem fs = FileSystem.get(conf);
2417 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
2418 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
2419 Path famDir = new Path(regionDir, FAM_STR);
2420 Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
2421 fs.create(fakeReferenceFile);
2422
2423 HBaseFsck hbck = doFsck(conf, false);
2424 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_REFERENCE_HFILE });
2425
2426 doFsck(conf, true);
2427
2428 assertNoErrors(doFsck(conf, false));
2429 } finally {
2430 cleanupTable(table);
2431 }
2432 }
2433
2434
2435
2436
2437 @Test(timeout = 180000)
2438 public void testLingeringHFileLinks() throws Exception {
2439 TableName table = TableName.valueOf("testLingeringHFileLinks");
2440 try {
2441 setupTable(table);
2442
2443 FileSystem fs = FileSystem.get(conf);
2444 Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
2445 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
2446 String regionName = regionDir.getName();
2447 Path famDir = new Path(regionDir, FAM_STR);
2448 String HFILE_NAME = "01234567abcd";
2449 Path hFilePath = new Path(famDir, HFILE_NAME);
2450
2451
2452 HFileContext context = new HFileContextBuilder().withIncludesTags(false).build();
2453 HFile.Writer w =
2454 HFile.getWriterFactoryNoCache(conf).withPath(fs, hFilePath).withFileContext(context)
2455 .create();
2456 w.close();
2457
2458 HFileLink.create(conf, fs, famDir, table, regionName, HFILE_NAME);
2459
2460
2461 HBaseFsck hbck = doFsck(conf, false);
2462 assertNoErrors(hbck);
2463
2464
2465 fs.delete(hFilePath, true);
2466
2467
2468 hbck = doFsck(conf, false);
2469 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
2470 HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
2471
2472
2473 hbck = doFsck(conf, true);
2474 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
2475 HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
2476
2477
2478 hbck = doFsck(conf, false);
2479 assertNoErrors(hbck);
2480 } finally {
2481 cleanupTable(table);
2482 }
2483 }
2484
2485 @Test(timeout = 180000)
2486 public void testCorruptLinkDirectory() throws Exception {
2487 TableName table = TableName.valueOf("testLingeringHFileLinks");
2488 try {
2489 setupTable(table);
2490 FileSystem fs = FileSystem.get(conf);
2491
2492 Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
2493 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
2494 Path famDir = new Path(regionDir, FAM_STR);
2495 String regionName = regionDir.getName();
2496 String HFILE_NAME = "01234567abcd";
2497 String link = HFileLink.createHFileLinkName(table, regionName, HFILE_NAME);
2498
2499
2500 HBaseFsck hbck = doFsck(conf, false);
2501 assertNoErrors(hbck);
2502
2503
2504 fs.mkdirs(new Path(famDir, link));
2505 fs.create(new Path(new Path(famDir, link), "somefile"));
2506
2507
2508 hbck = doFsck(conf, false);
2509 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
2510 HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
2511
2512
2513 hbck = doFsck(conf, true);
2514 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
2515 HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
2516
2517
2518 hbck = doFsck(conf, false);
2519 assertNoErrors(hbck);
2520 } finally {
2521 cleanupTable(table);
2522 }
2523 }
2524
2525
2526
2527
2528 @Test (timeout=180000)
2529 public void testMissingRegionInfoQualifier() throws Exception {
2530 Connection connection = ConnectionFactory.createConnection(conf);
2531 TableName table = TableName.valueOf("testMissingRegionInfoQualifier");
2532 try {
2533 setupTable(table);
2534
2535
2536 final List<Delete> deletes = new LinkedList<Delete>();
2537 Table meta = connection.getTable(TableName.META_TABLE_NAME, hbfsckExecutorService);
2538 MetaScanner.metaScan(connection, new MetaScanner.MetaScannerVisitor() {
2539
2540 @Override
2541 public boolean processRow(Result rowResult) throws IOException {
2542 HRegionInfo hri = MetaTableAccessor.getHRegionInfo(rowResult);
2543 if (hri != null && !hri.getTable().isSystemTable()) {
2544 Delete delete = new Delete(rowResult.getRow());
2545 delete.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2546 deletes.add(delete);
2547 }
2548 return true;
2549 }
2550
2551 @Override
2552 public void close() throws IOException {
2553 }
2554 });
2555 meta.delete(deletes);
2556
2557
2558 meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2559 HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020")));
2560 meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2561 HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L)));
2562 meta.close();
2563
2564 HBaseFsck hbck = doFsck(conf, false);
2565 assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2566
2567
2568 hbck = doFsck(conf, true);
2569
2570
2571 assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2572 } finally {
2573 cleanupTable(table);
2574 }
2575 connection.close();
2576 }
2577
2578
2579
2580
2581
2582 @Test (timeout=180000)
2583 public void testErrorReporter() throws Exception {
2584 try {
2585 MockErrorReporter.calledCount = 0;
2586 doFsck(conf, false);
2587 assertEquals(MockErrorReporter.calledCount, 0);
2588
2589 conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
2590 doFsck(conf, false);
2591 assertTrue(MockErrorReporter.calledCount > 20);
2592 } finally {
2593 conf.set("hbasefsck.errorreporter",
2594 PrintingErrorReporter.class.getName());
2595 MockErrorReporter.calledCount = 0;
2596 }
2597 }
2598
2599 static class MockErrorReporter implements ErrorReporter {
2600 static int calledCount = 0;
2601
2602 @Override
2603 public void clear() {
2604 calledCount++;
2605 }
2606
2607 @Override
2608 public void report(String message) {
2609 calledCount++;
2610 }
2611
2612 @Override
2613 public void reportError(String message) {
2614 calledCount++;
2615 }
2616
2617 @Override
2618 public void reportError(ERROR_CODE errorCode, String message) {
2619 calledCount++;
2620 }
2621
2622 @Override
2623 public void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
2624 calledCount++;
2625 }
2626
2627 @Override
2628 public void reportError(ERROR_CODE errorCode,
2629 String message, TableInfo table, HbckInfo info) {
2630 calledCount++;
2631 }
2632
2633 @Override
2634 public void reportError(ERROR_CODE errorCode, String message,
2635 TableInfo table, HbckInfo info1, HbckInfo info2) {
2636 calledCount++;
2637 }
2638
2639 @Override
2640 public int summarize() {
2641 return ++calledCount;
2642 }
2643
2644 @Override
2645 public void detail(String details) {
2646 calledCount++;
2647 }
2648
2649 @Override
2650 public ArrayList<ERROR_CODE> getErrorList() {
2651 calledCount++;
2652 return new ArrayList<ERROR_CODE>();
2653 }
2654
2655 @Override
2656 public void progress() {
2657 calledCount++;
2658 }
2659
2660 @Override
2661 public void print(String message) {
2662 calledCount++;
2663 }
2664
2665 @Override
2666 public void resetErrors() {
2667 calledCount++;
2668 }
2669
2670 @Override
2671 public boolean tableHasErrors(TableInfo table) {
2672 calledCount++;
2673 return false;
2674 }
2675 }
2676
2677 @Test(timeout=180000)
2678 public void testCheckTableLocks() throws Exception {
2679 IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
2680 EnvironmentEdgeManager.injectEdge(edge);
2681
2682 HBaseFsck hbck = doFsck(conf, false);
2683 assertNoErrors(hbck);
2684
2685 ServerName mockName = ServerName.valueOf("localhost", 60000, 1);
2686 final TableName tableName = TableName.valueOf("foo");
2687
2688
2689 final TableLockManager tableLockManager =
2690 TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName);
2691 TableLock writeLock = tableLockManager.writeLock(tableName, "testCheckTableLocks");
2692 writeLock.acquire();
2693 hbck = doFsck(conf, false);
2694 assertNoErrors(hbck);
2695
2696 edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2697 TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS));
2698
2699 hbck = doFsck(conf, false);
2700 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2701
2702 final CountDownLatch latch = new CountDownLatch(1);
2703 new Thread() {
2704 @Override
2705 public void run() {
2706 TableLock readLock = tableLockManager.writeLock(tableName, "testCheckTableLocks");
2707 try {
2708 latch.countDown();
2709 readLock.acquire();
2710 } catch (IOException ex) {
2711 fail();
2712 } catch (IllegalStateException ex) {
2713 return;
2714 }
2715 fail("should not have come here");
2716 };
2717 }.start();
2718
2719 latch.await();
2720 Threads.sleep(300);
2721
2722 hbck = doFsck(conf, false);
2723
2724 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2725
2726 edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2727 TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS));
2728
2729 hbck = doFsck(conf, false);
2730
2731 assertErrors(
2732 hbck,
2733 new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK, ERROR_CODE.EXPIRED_TABLE_LOCK});
2734
2735 Configuration localConf = new Configuration(conf);
2736
2737
2738 localConf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1);
2739 Threads.sleep(10);
2740 hbck = doFsck(localConf, true);
2741
2742 hbck = doFsck(localConf, false);
2743 assertNoErrors(hbck);
2744
2745
2746 writeLock = tableLockManager.writeLock(tableName, "should acquire without blocking");
2747 writeLock.acquire();
2748 writeLock.release();
2749 tableLockManager.tableDeleted(tableName);
2750 }
2751
2752
2753
2754
2755 @Test
2756 public void testOrphanedTableZNode() throws Exception {
2757 TableName table = TableName.valueOf("testOrphanedZKTableEntry");
2758
2759 try {
2760 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getTableStateManager()
2761 .setTableState(table, ZooKeeperProtos.Table.State.ENABLING);
2762
2763 try {
2764 setupTable(table);
2765 Assert.fail(
2766 "Create table should fail when its ZNode has already existed with ENABLING state.");
2767 } catch(TableExistsException t) {
2768
2769 }
2770
2771 try {
2772 cleanupTable(table);
2773 } catch (IOException e) {
2774
2775
2776 }
2777
2778 HBaseFsck hbck = doFsck(conf, false);
2779 assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
2780
2781
2782 hbck = doFsck(conf, true);
2783
2784
2785 hbck = doFsck(conf, false);
2786 assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
2787
2788 setupTable(table);
2789 } finally {
2790
2791
2792 try {
2793 cleanupTable(table);
2794 } catch (IOException e) {
2795
2796
2797 }
2798 }
2799 }
2800
2801 @Test (timeout=180000)
2802 public void testMetaOffline() throws Exception {
2803
2804 HBaseFsck hbck = doFsck(conf, false);
2805 assertNoErrors(hbck);
2806 deleteMetaRegion(conf, true, false, false);
2807 hbck = doFsck(conf, false);
2808
2809
2810 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2811 hbck = doFsck(conf, true);
2812 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2813 hbck = doFsck(conf, false);
2814 assertNoErrors(hbck);
2815 }
2816
2817 private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
2818 boolean regionInfoOnly) throws IOException, InterruptedException {
2819 HRegionLocation metaLocation = connection.getRegionLocator(TableName.META_TABLE_NAME)
2820 .getRegionLocation(HConstants.EMPTY_START_ROW);
2821 ServerName hsa = metaLocation.getServerName();
2822 HRegionInfo hri = metaLocation.getRegionInfo();
2823 if (unassign) {
2824 LOG.info("Undeploying meta region " + hri + " from server " + hsa);
2825 try (Connection unmanagedConnection = ConnectionFactory.createConnection(conf)) {
2826 undeployRegion(unmanagedConnection, hsa, hri);
2827 }
2828 }
2829
2830 if (regionInfoOnly) {
2831 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
2832 Path rootDir = FSUtils.getRootDir(conf);
2833 FileSystem fs = rootDir.getFileSystem(conf);
2834 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2835 hri.getEncodedName());
2836 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
2837 fs.delete(hriPath, true);
2838 }
2839
2840 if (hdfs) {
2841 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
2842 Path rootDir = FSUtils.getRootDir(conf);
2843 FileSystem fs = rootDir.getFileSystem(conf);
2844 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2845 hri.getEncodedName());
2846 HBaseFsck.debugLsr(conf, p);
2847 boolean success = fs.delete(p, true);
2848 LOG.info("Deleted " + p + " sucessfully? " + success);
2849 HBaseFsck.debugLsr(conf, p);
2850 }
2851 }
2852
2853 @Test (timeout=180000)
2854 public void testTableWithNoRegions() throws Exception {
2855
2856
2857 TableName table =
2858 TableName.valueOf(name.getMethodName());
2859 try {
2860
2861 HTableDescriptor desc = new HTableDescriptor(table);
2862 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
2863 desc.addFamily(hcd);
2864 createTable(TEST_UTIL, desc, null);
2865 tbl = (HTable) connection.getTable(table, tableExecutorService);
2866
2867
2868 deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW,
2869 HConstants.EMPTY_END_ROW, false, false, true);
2870
2871 HBaseFsck hbck = doFsck(conf, false);
2872 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
2873
2874 doFsck(conf, true);
2875
2876
2877 doFsck(conf, true);
2878
2879
2880 assertNoErrors(doFsck(conf, false));
2881 } finally {
2882 cleanupTable(table);
2883 }
2884
2885 }
2886
2887 @Test (timeout=180000)
2888 public void testHbckAfterRegionMerge() throws Exception {
2889 TableName table = TableName.valueOf("testMergeRegionFilesInHdfs");
2890 Table meta = null;
2891 try {
2892
2893 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
2894 setupTable(table);
2895 assertEquals(ROWKEYS.length, countRows());
2896
2897
2898 admin.flush(table);
2899 HRegionInfo region1 = tbl.getRegionLocation(Bytes.toBytes("A")).getRegionInfo();
2900 HRegionInfo region2 = tbl.getRegionLocation(Bytes.toBytes("B")).getRegionInfo();
2901
2902 int regionCountBeforeMerge = tbl.getRegionLocations().size();
2903
2904 assertNotEquals(region1, region2);
2905
2906
2907 admin.mergeRegions(region1.getEncodedNameAsBytes(),
2908 region2.getEncodedNameAsBytes(), false);
2909
2910
2911 long timeout = System.currentTimeMillis() + 30 * 1000;
2912 while (true) {
2913 if (tbl.getRegionLocations().size() < regionCountBeforeMerge) {
2914 break;
2915 } else if (System.currentTimeMillis() > timeout) {
2916 fail("Time out waiting on region " + region1.getEncodedName()
2917 + " and " + region2.getEncodedName() + " be merged");
2918 }
2919 Thread.sleep(10);
2920 }
2921
2922 assertEquals(ROWKEYS.length, countRows());
2923
2924 HBaseFsck hbck = doFsck(conf, false);
2925 assertNoErrors(hbck);
2926
2927 } finally {
2928 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
2929 cleanupTable(table);
2930 IOUtils.closeQuietly(meta);
2931 }
2932 }
2933
2934 @Test (timeout = 180000)
2935 public void testRegionBoundariesCheck() throws Exception {
2936 HBaseFsck hbck = doFsck(conf, false);
2937 assertNoErrors(hbck);
2938 try {
2939 hbck.checkRegionBoundaries();
2940 } catch (IllegalArgumentException e) {
2941 if (e.getMessage().endsWith("not a valid DFS filename.")) {
2942 fail("Table directory path is not valid." + e.getMessage());
2943 }
2944 }
2945 }
2946
2947 @org.junit.Rule
2948 public TestName name = new TestName();
2949
2950 @Test (timeout=180000)
2951 public void testReadOnlyProperty() throws Exception {
2952 HBaseFsck hbck = doFsck(conf, false);
2953 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2954 hbck.shouldIgnorePreCheckPermission());
2955
2956 hbck = doFsck(conf, true);
2957 Assert.assertEquals("shouldIgnorePreCheckPermission", false,
2958 hbck.shouldIgnorePreCheckPermission());
2959
2960 hbck = doFsck(conf, true);
2961 hbck.setIgnorePreCheckPermission(true);
2962 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2963 hbck.shouldIgnorePreCheckPermission());
2964 }
2965
2966 @Test (timeout=180000)
2967 public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception {
2968 TableName table = TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit");
2969 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
2970 try {
2971 HTableDescriptor desc = new HTableDescriptor(table);
2972 desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
2973 createTable(TEST_UTIL, desc, null);
2974 tbl = new HTable(cluster.getConfiguration(), desc.getTableName());
2975 for (int i = 0; i < 5; i++) {
2976 Put p1 = new Put(("r" + i).getBytes());
2977 p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
2978 tbl.put(p1);
2979 }
2980 admin.flush(desc.getTableName());
2981 List<HRegion> regions = cluster.getRegions(desc.getTableName());
2982 int serverWith = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
2983 HRegionServer regionServer = cluster.getRegionServer(serverWith);
2984 cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
2985 SplitTransactionImpl st = new SplitTransactionImpl(regions.get(0), Bytes.toBytes("r3"));
2986 st.prepare();
2987 st.stepsBeforePONR(regionServer, regionServer, false);
2988 AssignmentManager am = cluster.getMaster().getAssignmentManager();
2989 Map<String, RegionState> regionsInTransition = am.getRegionStates().getRegionsInTransition();
2990 for (RegionState state : regionsInTransition.values()) {
2991 am.regionOffline(state.getRegion());
2992 }
2993 ZKAssign.deleteNodeFailSilent(regionServer.getZooKeeper(), regions.get(0).getRegionInfo());
2994 Map<HRegionInfo, ServerName> regionsMap = new HashMap<HRegionInfo, ServerName>();
2995 regionsMap.put(regions.get(0).getRegionInfo(), regionServer.getServerName());
2996 am.assign(regionsMap);
2997 am.waitForAssignment(regions.get(0).getRegionInfo());
2998 HBaseFsck hbck = doFsck(conf, false);
2999 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
3000 ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
3001
3002 assertEquals(0, hbck.getOverlapGroups(table).size());
3003
3004
3005 assertErrors(
3006 doFsck(
3007 conf, false, true, false, false, false, false, false, false, false, false, false, false, null),
3008 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
3009 ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
3010
3011
3012 assertNoErrors(doFsck(conf, false));
3013 assertEquals(5, countRows());
3014 } finally {
3015 if (tbl != null) {
3016 tbl.close();
3017 tbl = null;
3018 }
3019 cleanupTable(table);
3020 }
3021 }
3022
3023
3024 public static class MasterSyncObserver extends BaseMasterObserver {
3025 volatile CountDownLatch tableCreationLatch = null;
3026 volatile CountDownLatch tableDeletionLatch = null;
3027
3028 @Override
3029 public void postCreateTableHandler(final ObserverContext<MasterCoprocessorEnvironment> ctx,
3030 HTableDescriptor desc, HRegionInfo[] regions) throws IOException {
3031
3032 if (tableCreationLatch != null) {
3033 tableCreationLatch.countDown();
3034 }
3035 }
3036
3037 @Override
3038 public void postDeleteTableHandler(final ObserverContext<MasterCoprocessorEnvironment> ctx,
3039 TableName tableName)
3040 throws IOException {
3041
3042 if (tableDeletionLatch != null) {
3043 tableDeletionLatch.countDown();
3044 }
3045 }
3046 }
3047
3048 public static void createTable(HBaseTestingUtility testUtil, HTableDescriptor htd,
3049 byte [][] splitKeys) throws Exception {
3050
3051
3052 MasterSyncObserver observer = (MasterSyncObserver)testUtil.getHBaseCluster().getMaster()
3053 .getMasterCoprocessorHost().findCoprocessor(MasterSyncObserver.class.getName());
3054 observer.tableCreationLatch = new CountDownLatch(1);
3055 if (splitKeys != null) {
3056 admin.createTable(htd, splitKeys);
3057 } else {
3058 admin.createTable(htd);
3059 }
3060 observer.tableCreationLatch.await();
3061 observer.tableCreationLatch = null;
3062 testUtil.waitUntilAllRegionsAssigned(htd.getTableName());
3063 }
3064
3065 public static void deleteTable(HBaseTestingUtility testUtil, TableName tableName)
3066 throws Exception {
3067
3068
3069 MasterSyncObserver observer = (MasterSyncObserver)testUtil.getHBaseCluster().getMaster()
3070 .getMasterCoprocessorHost().findCoprocessor(MasterSyncObserver.class.getName());
3071 observer.tableDeletionLatch = new CountDownLatch(1);
3072 try {
3073 admin.disableTable(tableName);
3074 } catch (Exception e) {
3075 LOG.debug("Table: " + tableName + " already disabled, so just deleting it.");
3076 }
3077 admin.deleteTable(tableName);
3078 observer.tableDeletionLatch.await();
3079 observer.tableDeletionLatch = null;
3080 }
3081 }