1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertTrue;
25
26 import java.io.IOException;
27 import java.util.ArrayList;
28 import java.util.Iterator;
29 import java.util.List;
30 import java.util.Set;
31 import java.util.TreeSet;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.fs.FileSystem;
37 import org.apache.hadoop.fs.Path;
38 import org.apache.hadoop.hbase.Abortable;
39 import org.apache.hadoop.hbase.ClusterStatus;
40 import org.apache.hadoop.hbase.HBaseConfiguration;
41 import org.apache.hadoop.hbase.HBaseTestingUtility;
42 import org.apache.hadoop.hbase.HColumnDescriptor;
43 import org.apache.hadoop.hbase.HConstants;
44 import org.apache.hadoop.hbase.HRegionInfo;
45 import org.apache.hadoop.hbase.HTableDescriptor;
46 import org.apache.hadoop.hbase.testclassification.LargeTests;
47 import org.apache.hadoop.hbase.MetaTableAccessor;
48 import org.apache.hadoop.hbase.MiniHBaseCluster;
49 import org.apache.hadoop.hbase.RegionTransition;
50 import org.apache.hadoop.hbase.ServerName;
51 import org.apache.hadoop.hbase.TableName;
52 import org.apache.hadoop.hbase.TableStateManager;
53 import org.apache.hadoop.hbase.client.RegionLocator;
54 import org.apache.hadoop.hbase.client.Table;
55 import org.apache.hadoop.hbase.executor.EventType;
56 import org.apache.hadoop.hbase.master.RegionState.State;
57 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
58 import org.apache.hadoop.hbase.protobuf.RequestConverter;
59 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
60 import org.apache.hadoop.hbase.regionserver.HRegion;
61 import org.apache.hadoop.hbase.regionserver.HRegionServer;
62 import org.apache.hadoop.hbase.regionserver.Region;
63 import org.apache.hadoop.hbase.regionserver.RegionMergeTransactionImpl;
64 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
65 import org.apache.hadoop.hbase.util.Bytes;
66 import org.apache.hadoop.hbase.util.FSTableDescriptors;
67 import org.apache.hadoop.hbase.util.FSUtils;
68 import org.apache.hadoop.hbase.util.JVMClusterUtil;
69 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
70 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
71 import org.apache.hadoop.hbase.util.Threads;
72 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
73 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
74 import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
75 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
76 import org.apache.zookeeper.data.Stat;
77 import org.junit.Test;
78 import org.junit.experimental.categories.Category;
79
80 @Category(LargeTests.class)
81 public class TestMasterFailover {
82 private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163 @Test (timeout=240000)
164 public void testMasterFailoverWithMockedRIT() throws Exception {
165
166 final int NUM_MASTERS = 1;
167 final int NUM_RS = 3;
168
169
170 Configuration conf = HBaseConfiguration.create();
171 conf.setBoolean("hbase.assignment.usezk", true);
172
173
174 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
175 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
176 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
177 log("Cluster started");
178
179
180 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
181
182
183 List<MasterThread> masterThreads = cluster.getMasterThreads();
184 assertEquals(1, masterThreads.size());
185
186
187 assertTrue(cluster.waitForActiveAndReadyMaster());
188 HMaster master = masterThreads.get(0).getMaster();
189 assertTrue(master.isActiveMaster());
190 assertTrue(master.isInitialized());
191
192
193 master.balanceSwitch(false);
194
195
196 byte [] FAMILY = Bytes.toBytes("family");
197 byte [][] SPLIT_KEYS = new byte [][] {
198 new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
199 Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
200 Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
201 Bytes.toBytes("iii"), Bytes.toBytes("jjj")
202 };
203
204 byte [] enabledTable = Bytes.toBytes("enabledTable");
205 HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
206 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
207
208 FileSystem filesystem = FileSystem.get(conf);
209 Path rootdir = FSUtils.getRootDir(conf);
210 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
211
212 fstd.createTableDescriptor(htdEnabled);
213
214 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(), null, null);
215 createRegion(hriEnabled, rootdir, conf, htdEnabled);
216
217 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
218 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
219
220 TableName disabledTable = TableName.valueOf("disabledTable");
221 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
222 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
223
224 fstd.createTableDescriptor(htdDisabled);
225 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
226 createRegion(hriDisabled, rootdir, conf, htdDisabled);
227 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
228 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
229
230 TableName tableWithMergingRegions = TableName.valueOf("tableWithMergingRegions");
231 TEST_UTIL.createTable(tableWithMergingRegions, FAMILY, new byte [][] {Bytes.toBytes("m")});
232
233 log("Regions in hbase:meta and namespace have been created");
234
235
236
237 assertTrue(4 <= cluster.countServedRegions());
238
239
240 AssignmentManager am = master.getAssignmentManager();
241 RegionStates regionStates = am.getRegionStates();
242 List<HRegionInfo> mergingRegions = regionStates.getRegionsOfTable(tableWithMergingRegions);
243 assertEquals(2, mergingRegions.size());
244 HRegionInfo a = mergingRegions.get(0);
245 HRegionInfo b = mergingRegions.get(1);
246 HRegionInfo newRegion = RegionMergeTransactionImpl.getMergedRegionInfo(a, b);
247 ServerName mergingServer = regionStates.getRegionServerOfRegion(a);
248 ServerName serverB = regionStates.getRegionServerOfRegion(b);
249 if (!serverB.equals(mergingServer)) {
250 RegionPlan plan = new RegionPlan(b, serverB, mergingServer);
251 am.balance(plan);
252 assertTrue(am.waitForAssignment(b));
253 }
254
255
256 HRegionServer hrs = cluster.getRegionServer(0);
257 ServerName serverName = hrs.getServerName();
258 HRegionInfo closingRegion = enabledRegions.remove(0);
259
260 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
261 enabledAndAssignedRegions.add(enabledRegions.remove(0));
262 enabledAndAssignedRegions.add(enabledRegions.remove(0));
263 enabledAndAssignedRegions.add(closingRegion);
264
265 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
266 disabledAndAssignedRegions.add(disabledRegions.remove(0));
267 disabledAndAssignedRegions.add(disabledRegions.remove(0));
268
269
270 for (HRegionInfo hri : enabledAndAssignedRegions) {
271 master.assignmentManager.addPlan(hri.getEncodedName(),
272 new RegionPlan(hri, null, serverName));
273 master.assignRegion(hri);
274 }
275
276 for (HRegionInfo hri : disabledAndAssignedRegions) {
277 master.assignmentManager.addPlan(hri.getEncodedName(),
278 new RegionPlan(hri, null, serverName));
279 master.assignRegion(hri);
280 }
281
282
283 log("Waiting for assignment to finish");
284 ZKAssign.blockUntilNoRIT(zkw);
285 log("Assignment completed");
286
287
288 log("Aborting master");
289 cluster.abortMaster(0);
290 cluster.waitOnMaster(0);
291 log("Master has aborted");
292
293
294
295
296
297
298 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
299 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
300
301 log("Beginning to mock scenarios");
302
303
304 TableStateManager zktable = new ZKTableStateManager(zkw);
305 zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
306
307
308
309
310
311
312
313
314 HRegionInfo region = enabledRegions.remove(0);
315 regionsThatShouldBeOnline.add(region);
316 ZKAssign.createNodeOffline(zkw, region, serverName);
317
318
319
320
321
322 regionsThatShouldBeOnline.add(closingRegion);
323 ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
324
325
326
327
328
329
330
331 region = enabledRegions.remove(0);
332 regionsThatShouldBeOnline.add(region);
333 int version = ZKAssign.createNodeClosing(zkw, region, serverName);
334 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
335
336
337 region = disabledRegions.remove(0);
338 regionsThatShouldBeOffline.add(region);
339 version = ZKAssign.createNodeClosing(zkw, region, serverName);
340 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
341
342
343
344
345
346
347
348 region = enabledRegions.remove(0);
349 regionsThatShouldBeOnline.add(region);
350 ZKAssign.createNodeOffline(zkw, region, serverName);
351 ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
352 while (true) {
353 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
354 RegionTransition rt = RegionTransition.parseFrom(bytes);
355 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
356 break;
357 }
358 Thread.sleep(100);
359 }
360
361
362
363 region = disabledRegions.remove(0);
364 regionsThatShouldBeOffline.add(region);
365 ZKAssign.createNodeOffline(zkw, region, serverName);
366 ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
367 while (true) {
368 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
369 RegionTransition rt = RegionTransition.parseFrom(bytes);
370 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
371 break;
372 }
373 Thread.sleep(100);
374 }
375
376
377
378
379
380
381
382 hrs.getCoordinatedStateManager().
383 getRegionMergeCoordination().startRegionMergeTransaction(newRegion, mergingServer, a, b);
384
385
386
387
388
389
390
391
392
393 log("Done mocking data up in ZK");
394
395
396 log("Starting up a new master");
397 master = cluster.startMaster().getMaster();
398 log("Waiting for master to be ready");
399 cluster.waitForActiveAndReadyMaster();
400 log("Master is ready");
401
402
403 regionStates = master.getAssignmentManager().getRegionStates();
404
405 assertTrue(regionStates.isRegionInState(a, State.MERGING));
406 assertTrue(regionStates.isRegionInState(b, State.MERGING));
407 assertTrue(regionStates.isRegionInState(newRegion, State.MERGING_NEW));
408
409
410 ZKAssign.deleteNodeFailSilent(zkw, newRegion);
411
412
413 log("Waiting for no more RIT");
414 ZKAssign.blockUntilNoRIT(zkw);
415 log("No more RIT in ZK, now doing final test verification");
416
417
418 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
419 for (JVMClusterUtil.RegionServerThread rst :
420 cluster.getRegionServerThreads()) {
421 onlineRegions.addAll(ProtobufUtil.getOnlineRegions(
422 rst.getRegionServer().getRSRpcServices()));
423 }
424
425
426 for (HRegionInfo hri : regionsThatShouldBeOnline) {
427 assertTrue(onlineRegions.contains(hri));
428 }
429
430
431 for (HRegionInfo hri : regionsThatShouldBeOffline) {
432 if (onlineRegions.contains(hri)) {
433 LOG.debug(hri);
434 }
435 assertFalse(onlineRegions.contains(hri));
436 }
437
438 log("Done with verification, all passed, shutting down cluster");
439
440
441 TEST_UTIL.shutdownMiniCluster();
442 }
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500 @Test (timeout=180000)
501 public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
502
503 final int NUM_MASTERS = 1;
504 final int NUM_RS = 2;
505
506
507 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
508 Configuration conf = TEST_UTIL.getConfiguration();
509 conf.setBoolean("hbase.assignment.usezk", true);
510
511
512 conf.setBoolean("hbase.master.start.wait.for.namespacemanager", true);
513
514 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
515 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2);
516 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
517 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
518 log("Cluster started");
519
520
521 ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
522 "unittest", new Abortable() {
523
524 @Override
525 public void abort(String why, Throwable e) {
526 LOG.error("Fatal ZK Error: " + why, e);
527 org.junit.Assert.assertFalse("Fatal ZK error", true);
528 }
529
530 @Override
531 public boolean isAborted() {
532 return false;
533 }
534
535 });
536
537
538 List<MasterThread> masterThreads = cluster.getMasterThreads();
539 assertEquals(1, masterThreads.size());
540
541
542 assertTrue(cluster.waitForActiveAndReadyMaster());
543 HMaster master = masterThreads.get(0).getMaster();
544 assertTrue(master.isActiveMaster());
545 assertTrue(master.isInitialized());
546
547
548 master.balanceSwitch(false);
549
550
551 byte [] FAMILY = Bytes.toBytes("family");
552 byte[][] SPLIT_KEYS =
553 TEST_UTIL.getRegionSplitStartKeys(Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 30);
554
555 byte [] enabledTable = Bytes.toBytes("enabledTable");
556 HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
557 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
558 FileSystem filesystem = FileSystem.get(conf);
559 Path rootdir = FSUtils.getRootDir(conf);
560 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
561
562 fstd.createTableDescriptor(htdEnabled);
563 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(),
564 null, null);
565 createRegion(hriEnabled, rootdir, conf, htdEnabled);
566
567 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
568 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
569
570 TableName disabledTable =
571 TableName.valueOf("disabledTable");
572 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
573 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
574
575 fstd.createTableDescriptor(htdDisabled);
576 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
577 createRegion(hriDisabled, rootdir, conf, htdDisabled);
578
579 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
580 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
581
582 log("Regions in hbase:meta and Namespace have been created");
583
584
585 assertTrue(2 <= cluster.countServedRegions());
586
587
588 List<RegionServerThread> regionservers =
589 cluster.getRegionServerThreads();
590 HRegionServer hrs = regionservers.get(0).getRegionServer();
591
592
593 RegionServerThread hrsDeadThread = regionservers.get(1);
594 HRegionServer hrsDead = hrsDeadThread.getRegionServer();
595 ServerName deadServerName = hrsDead.getServerName();
596
597
598 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
599 enabledAndAssignedRegions.addAll(enabledRegions.subList(0, 6));
600 enabledRegions.removeAll(enabledAndAssignedRegions);
601 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
602 disabledAndAssignedRegions.addAll(disabledRegions.subList(0, 6));
603 disabledRegions.removeAll(disabledAndAssignedRegions);
604
605
606 for (HRegionInfo hri : enabledAndAssignedRegions) {
607 master.assignmentManager.addPlan(hri.getEncodedName(),
608 new RegionPlan(hri, null, hrs.getServerName()));
609 master.assignRegion(hri);
610 }
611 for (HRegionInfo hri : disabledAndAssignedRegions) {
612 master.assignmentManager.addPlan(hri.getEncodedName(),
613 new RegionPlan(hri, null, hrs.getServerName()));
614 master.assignRegion(hri);
615 }
616
617 log("Waiting for assignment to finish");
618 ZKAssign.blockUntilNoRIT(zkw);
619 master.assignmentManager.waitUntilNoRegionsInTransition(60000);
620 log("Assignment completed");
621
622 assertTrue(" Table must be enabled.", master.getAssignmentManager()
623 .getTableStateManager().isTableState(TableName.valueOf("enabledTable"),
624 ZooKeeperProtos.Table.State.ENABLED));
625
626 List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
627 enabledAndOnDeadRegions.addAll(enabledRegions.subList(0, 6));
628 enabledRegions.removeAll(enabledAndOnDeadRegions);
629 List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
630 disabledAndOnDeadRegions.addAll(disabledRegions.subList(0, 6));
631 disabledRegions.removeAll(disabledAndOnDeadRegions);
632
633
634 for (HRegionInfo hri : enabledAndOnDeadRegions) {
635 master.assignmentManager.addPlan(hri.getEncodedName(),
636 new RegionPlan(hri, null, deadServerName));
637 master.assignRegion(hri);
638 }
639 for (HRegionInfo hri : disabledAndOnDeadRegions) {
640 master.assignmentManager.addPlan(hri.getEncodedName(),
641 new RegionPlan(hri, null, deadServerName));
642 master.assignRegion(hri);
643 }
644
645
646 log("Waiting for assignment to finish");
647 ZKAssign.blockUntilNoRIT(zkw);
648 master.assignmentManager.waitUntilNoRegionsInTransition(60000);
649 log("Assignment completed");
650
651
652
653 verifyRegionLocation(hrs, enabledAndAssignedRegions);
654 verifyRegionLocation(hrs, disabledAndAssignedRegions);
655 verifyRegionLocation(hrsDead, enabledAndOnDeadRegions);
656 verifyRegionLocation(hrsDead, disabledAndOnDeadRegions);
657
658 assertTrue(" Didn't get enough regions of enabledTalbe on live rs.",
659 enabledAndAssignedRegions.size() >= 2);
660 assertTrue(" Didn't get enough regions of disalbedTable on live rs.",
661 disabledAndAssignedRegions.size() >= 2);
662 assertTrue(" Didn't get enough regions of enabledTalbe on dead rs.",
663 enabledAndOnDeadRegions.size() >= 2);
664 assertTrue(" Didn't get enough regions of disalbedTable on dead rs.",
665 disabledAndOnDeadRegions.size() >= 2);
666
667
668 log("Aborting master");
669 cluster.abortMaster(0);
670 cluster.waitOnMaster(0);
671 log("Master has aborted");
672
673
674
675
676
677
678 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
679 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
680
681 log("Beginning to mock scenarios");
682
683
684 TableStateManager zktable = new ZKTableStateManager(zkw);
685 zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
686
687 assertTrue(" The enabled table should be identified on master fail over.",
688 zktable.isTableState(TableName.valueOf("enabledTable"),
689 ZooKeeperProtos.Table.State.ENABLED));
690
691
692
693
694
695
696 HRegionInfo region = enabledAndOnDeadRegions.remove(0);
697 regionsThatShouldBeOnline.add(region);
698 ZKAssign.createNodeClosing(zkw, region, deadServerName);
699 LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
700 region + "\n\n");
701
702
703 region = disabledAndOnDeadRegions.remove(0);
704 regionsThatShouldBeOffline.add(region);
705 ZKAssign.createNodeClosing(zkw, region, deadServerName);
706 LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
707 region + "\n\n");
708
709
710
711
712
713
714 region = enabledAndOnDeadRegions.remove(0);
715 regionsThatShouldBeOnline.add(region);
716 int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
717 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
718 LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
719 region + "\n\n");
720
721
722 region = disabledAndOnDeadRegions.remove(0);
723 regionsThatShouldBeOffline.add(region);
724 version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
725 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
726 LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
727 region + "\n\n");
728
729
730
731
732
733
734 region = enabledRegions.remove(0);
735 regionsThatShouldBeOnline.add(region);
736 ZKAssign.createNodeOffline(zkw, region, deadServerName);
737 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
738 LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
739 region + "\n\n");
740
741
742 region = disabledRegions.remove(0);
743 regionsThatShouldBeOffline.add(region);
744 ZKAssign.createNodeOffline(zkw, region, deadServerName);
745 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
746 LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
747 region + "\n\n");
748
749
750
751
752
753
754 region = enabledRegions.remove(0);
755 regionsThatShouldBeOnline.add(region);
756 ZKAssign.createNodeOffline(zkw, region, deadServerName);
757 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
758 hrsDead.getServerName(), region);
759 while (true) {
760 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
761 RegionTransition rt = RegionTransition.parseFrom(bytes);
762 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
763 break;
764 }
765 Thread.sleep(100);
766 }
767 LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" +
768 region + "\n\n");
769
770
771 region = disabledRegions.remove(0);
772 regionsThatShouldBeOffline.add(region);
773 ZKAssign.createNodeOffline(zkw, region, deadServerName);
774 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
775 hrsDead.getServerName(), region);
776 while (true) {
777 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
778 RegionTransition rt = RegionTransition.parseFrom(bytes);
779 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
780 break;
781 }
782 Thread.sleep(100);
783 }
784 LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" +
785 region + "\n\n");
786
787
788
789
790
791
792 region = enabledRegions.remove(0);
793 regionsThatShouldBeOnline.add(region);
794 ZKAssign.createNodeOffline(zkw, region, deadServerName);
795 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
796 hrsDead.getServerName(), region);
797 while (true) {
798 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
799 RegionTransition rt = RegionTransition.parseFrom(bytes);
800 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
801 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
802 LOG.debug("DELETED " + rt);
803 break;
804 }
805 Thread.sleep(100);
806 }
807 LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
808 + "\n" + region + "\n\n");
809
810
811 region = disabledRegions.remove(0);
812 regionsThatShouldBeOffline.add(region);
813 ZKAssign.createNodeOffline(zkw, region, deadServerName);
814 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
815 hrsDead.getServerName(), region);
816 while (true) {
817 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
818 RegionTransition rt = RegionTransition.parseFrom(bytes);
819 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
820 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
821 break;
822 }
823 Thread.sleep(100);
824 }
825 LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
826 + "\n" + region + "\n\n");
827
828
829
830
831
832 log("Done mocking data up in ZK");
833
834
835 log("Killing RS " + deadServerName);
836 hrsDead.abort("Killing for unit test");
837 log("RS " + deadServerName + " killed");
838
839
840
841 while (hrsDeadThread.isAlive()) {
842 Threads.sleep(10);
843 }
844 log("Starting up a new master");
845 master = cluster.startMaster().getMaster();
846 log("Waiting for master to be ready");
847 assertTrue(cluster.waitForActiveAndReadyMaster());
848 log("Master is ready");
849
850
851 while (master.getServerManager().areDeadServersInProgress()) {
852 Thread.sleep(10);
853 }
854
855
856 log("Waiting for no more RIT");
857 ZKAssign.blockUntilNoRIT(zkw);
858 log("No more RIT in ZK");
859 long now = System.currentTimeMillis();
860 long maxTime = 120000;
861 boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
862 if (!done) {
863 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
864 LOG.info("rit=" + regionStates.getRegionsInTransition());
865 }
866 long elapsed = System.currentTimeMillis() - now;
867 assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
868 elapsed < maxTime);
869 log("No more RIT in RIT map, doing final test verification");
870
871
872 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
873 now = System.currentTimeMillis();
874 maxTime = 30000;
875 for (JVMClusterUtil.RegionServerThread rst :
876 cluster.getRegionServerThreads()) {
877 try {
878 HRegionServer rs = rst.getRegionServer();
879 while (!rs.getRegionsInTransitionInRS().isEmpty()) {
880 elapsed = System.currentTimeMillis() - now;
881 assertTrue("Test timed out in getting online regions", elapsed < maxTime);
882 if (rs.isAborted() || rs.isStopped()) {
883
884 break;
885 }
886 Thread.sleep(100);
887 }
888 onlineRegions.addAll(ProtobufUtil.getOnlineRegions(rs.getRSRpcServices()));
889 } catch (RegionServerStoppedException e) {
890 LOG.info("Got RegionServerStoppedException", e);
891 }
892 }
893
894
895 for (HRegionInfo hri : regionsThatShouldBeOnline) {
896 assertTrue("region=" + hri.getRegionNameAsString() + ", " + onlineRegions.toString(),
897 onlineRegions.contains(hri));
898 }
899
900
901 for (HRegionInfo hri : regionsThatShouldBeOffline) {
902 assertFalse(onlineRegions.contains(hri));
903 }
904
905 log("Done with verification, all passed, shutting down cluster");
906
907
908 TEST_UTIL.shutdownMiniCluster();
909 }
910
911
912
913
914 private void verifyRegionLocation(HRegionServer hrs, List<HRegionInfo> regions)
915 throws IOException {
916 List<HRegionInfo> tmpOnlineRegions =
917 ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
918 Iterator<HRegionInfo> itr = regions.iterator();
919 while (itr.hasNext()) {
920 HRegionInfo tmp = itr.next();
921 if (!tmpOnlineRegions.contains(tmp)) {
922 itr.remove();
923 }
924 }
925 }
926
927 HRegion createRegion(final HRegionInfo hri, final Path rootdir, final Configuration c,
928 final HTableDescriptor htd)
929 throws IOException {
930 HRegion r = HRegion.createHRegion(hri, rootdir, c, htd);
931
932
933
934
935
936 HRegion.closeHRegion(r);
937 return r;
938 }
939
940
941
942
943 private void log(String string) {
944 LOG.info("\n\n" + string + " \n\n");
945 }
946
947 @Test (timeout=180000)
948 public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
949 throws Exception {
950 LOG.info("Starting testShouldCheckMasterFailOverWhenMETAIsInOpenedState");
951 final int NUM_MASTERS = 1;
952 final int NUM_RS = 2;
953
954
955 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
956 Configuration conf = TEST_UTIL.getConfiguration();
957 conf.setInt("hbase.master.info.port", -1);
958 conf.setBoolean("hbase.assignment.usezk", true);
959
960 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
961 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
962
963
964 List<RegionServerThread> regionServerThreads =
965 cluster.getRegionServerThreads();
966 Region metaRegion = null;
967 HRegionServer metaRegionServer = null;
968 for (RegionServerThread regionServerThread : regionServerThreads) {
969 HRegionServer regionServer = regionServerThread.getRegionServer();
970 metaRegion = regionServer.getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
971 regionServer.abort("");
972 if (null != metaRegion) {
973 metaRegionServer = regionServer;
974 break;
975 }
976 }
977
978 TEST_UTIL.shutdownMiniHBaseCluster();
979
980
981 ZooKeeperWatcher zkw =
982 HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL,
983 metaRegion, metaRegionServer.getServerName());
984
985 LOG.info("Staring cluster for second time");
986 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
987
988 HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
989 while (!master.isInitialized()) {
990 Thread.sleep(100);
991 }
992
993 log("Waiting for no more RIT");
994 ZKAssign.blockUntilNoRIT(zkw);
995
996 zkw.close();
997
998 TEST_UTIL.shutdownMiniCluster();
999 }
1000
1001
1002
1003
1004 @Test(timeout=240000)
1005 public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
1006 final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
1007 final int NUM_MASTERS = 1;
1008 final int NUM_RS = 2;
1009
1010
1011 Configuration conf = HBaseConfiguration.create();
1012 conf.setBoolean("hbase.assignment.usezk", true);
1013
1014
1015 final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1016 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1017 log("Cluster started");
1018
1019 TEST_UTIL.createTable(table, Bytes.toBytes("family"));
1020 HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1021 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1022 HRegionInfo hri = regionStates.getRegionsOfTable(table).get(0);
1023 ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1024 TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
1025
1026 ServerName dstName = null;
1027 for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
1028 if (!tmpServer.equals(serverName)) {
1029 dstName = tmpServer;
1030 break;
1031 }
1032 }
1033
1034 assertTrue(dstName != null);
1035
1036 TEST_UTIL.shutdownMiniHBaseCluster();
1037
1038 ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
1039 ZKAssign.createNodeOffline(zkw, hri, dstName);
1040 Stat stat = new Stat();
1041 byte[] data =
1042 ZKAssign.getDataNoWatch(zkw, hri.getEncodedName(), stat);
1043 assertTrue(data != null);
1044 RegionTransition rt = RegionTransition.parseFrom(data);
1045 assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
1046
1047 LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
1048 + " and dst server=" + dstName);
1049
1050
1051 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
1052
1053 while (true) {
1054 master = TEST_UTIL.getHBaseCluster().getMaster();
1055 if (master != null && master.isInitialized()) {
1056 ServerManager serverManager = master.getServerManager();
1057 if (!serverManager.areDeadServersInProgress()) {
1058 break;
1059 }
1060 }
1061 Thread.sleep(200);
1062 }
1063
1064
1065 master = TEST_UTIL.getHBaseCluster().getMaster();
1066 master.getAssignmentManager().waitForAssignment(hri);
1067 regionStates = master.getAssignmentManager().getRegionStates();
1068 RegionState newState = regionStates.getRegionState(hri);
1069 assertTrue(newState.isOpened());
1070 }
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080 @Test (timeout=240000)
1081 public void testSimpleMasterFailover() throws Exception {
1082
1083 final int NUM_MASTERS = 3;
1084 final int NUM_RS = 3;
1085
1086
1087 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
1088
1089 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1090 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1091
1092
1093 List<MasterThread> masterThreads = cluster.getMasterThreads();
1094
1095
1096 for (MasterThread mt : masterThreads) {
1097 assertTrue(mt.isAlive());
1098 }
1099
1100
1101 int numActive = 0;
1102 int activeIndex = -1;
1103 ServerName activeName = null;
1104 HMaster active = null;
1105 for (int i = 0; i < masterThreads.size(); i++) {
1106 if (masterThreads.get(i).getMaster().isActiveMaster()) {
1107 numActive++;
1108 activeIndex = i;
1109 active = masterThreads.get(activeIndex).getMaster();
1110 activeName = active.getServerName();
1111 }
1112 }
1113 assertEquals(1, numActive);
1114 assertEquals(NUM_MASTERS, masterThreads.size());
1115 LOG.info("Active master " + activeName);
1116
1117
1118 assertNotNull(active);
1119 ClusterStatus status = active.getClusterStatus();
1120 assertTrue(status.getMaster().equals(activeName));
1121 assertEquals(2, status.getBackupMastersSize());
1122 assertEquals(2, status.getBackupMasters().size());
1123
1124
1125 int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
1126 HMaster master = cluster.getMaster(backupIndex);
1127 LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
1128 cluster.stopMaster(backupIndex, false);
1129 cluster.waitOnMaster(backupIndex);
1130
1131
1132 for (int i = 0; i < masterThreads.size(); i++) {
1133 if (masterThreads.get(i).getMaster().isActiveMaster()) {
1134 assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
1135 activeIndex = i;
1136 active = masterThreads.get(activeIndex).getMaster();
1137 }
1138 }
1139 assertEquals(1, numActive);
1140 assertEquals(2, masterThreads.size());
1141 int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
1142 LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers");
1143 assertEquals(3, rsCount);
1144
1145
1146 assertNotNull(active);
1147 status = active.getClusterStatus();
1148 assertTrue(status.getMaster().equals(activeName));
1149 assertEquals(1, status.getBackupMastersSize());
1150 assertEquals(1, status.getBackupMasters().size());
1151
1152
1153 LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
1154 cluster.stopMaster(activeIndex, false);
1155 cluster.waitOnMaster(activeIndex);
1156
1157
1158 assertTrue(cluster.waitForActiveAndReadyMaster());
1159
1160 LOG.debug("\n\nVerifying backup master is now active\n");
1161
1162 assertEquals(1, masterThreads.size());
1163
1164
1165 active = masterThreads.get(0).getMaster();
1166 assertNotNull(active);
1167 status = active.getClusterStatus();
1168 ServerName mastername = status.getMaster();
1169 assertTrue(mastername.equals(active.getServerName()));
1170 assertTrue(active.isActiveMaster());
1171 assertEquals(0, status.getBackupMastersSize());
1172 assertEquals(0, status.getBackupMasters().size());
1173 int rss = status.getServersSize();
1174 LOG.info("Active master " + mastername.getServerName() + " managing " +
1175 rss + " region servers");
1176 assertEquals(3, rss);
1177
1178
1179 TEST_UTIL.shutdownMiniCluster();
1180 }
1181
1182
1183
1184
1185 @Test (timeout=180000)
1186 @SuppressWarnings("deprecation")
1187 public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
1188 final int NUM_MASTERS = 1;
1189 final int NUM_RS = 1;
1190
1191
1192 Configuration conf = HBaseConfiguration.create();
1193 conf.setBoolean("hbase.assignment.usezk", false);
1194
1195
1196 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1197 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1198 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1199 log("Cluster started");
1200
1201
1202 List<MasterThread> masterThreads = cluster.getMasterThreads();
1203 assertEquals(1, masterThreads.size());
1204
1205
1206 assertTrue(cluster.waitForActiveAndReadyMaster());
1207 HMaster master = masterThreads.get(0).getMaster();
1208 assertTrue(master.isActiveMaster());
1209 assertTrue(master.isInitialized());
1210
1211
1212 Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
1213 onlineTable.close();
1214
1215 HTableDescriptor offlineTable = new HTableDescriptor(
1216 TableName.valueOf(Bytes.toBytes("offlineTable")));
1217 offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
1218
1219 FileSystem filesystem = FileSystem.get(conf);
1220 Path rootdir = FSUtils.getRootDir(conf);
1221 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
1222 fstd.createTableDescriptor(offlineTable);
1223
1224 HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
1225 createRegion(hriOffline, rootdir, conf, offlineTable);
1226 MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);
1227
1228 log("Regions in hbase:meta and namespace have been created");
1229
1230
1231
1232 assertTrue(3 <= cluster.countServedRegions());
1233 HRegionInfo hriOnline = null;
1234 try (RegionLocator locator =
1235 TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
1236 hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
1237 }
1238 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1239 RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
1240
1241
1242
1243 RegionState oldState = regionStates.getRegionState(hriOnline);
1244 RegionState newState = new RegionState(
1245 hriOnline, State.PENDING_CLOSE, oldState.getServerName());
1246 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1247
1248
1249
1250 oldState = new RegionState(hriOffline, State.OFFLINE);
1251 newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
1252 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1253
1254 HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
1255 createRegion(failedClose, rootdir, conf, offlineTable);
1256 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
1257
1258 oldState = new RegionState(failedClose, State.PENDING_CLOSE);
1259 newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
1260 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1261
1262
1263 HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
1264 createRegion(failedOpen, rootdir, conf, offlineTable);
1265 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
1266
1267
1268
1269 oldState = new RegionState(failedOpen, State.PENDING_OPEN);
1270 newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
1271 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1272
1273 HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
1274 createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
1275 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
1276
1277
1278
1279 oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
1280 newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
1281 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1282
1283
1284
1285
1286 log("Aborting master");
1287 cluster.abortMaster(0);
1288 cluster.waitOnMaster(0);
1289 log("Master has aborted");
1290
1291
1292 log("Starting up a new master");
1293 master = cluster.startMaster().getMaster();
1294 log("Waiting for master to be ready");
1295 cluster.waitForActiveAndReadyMaster();
1296 log("Master is ready");
1297
1298
1299 master.getAssignmentManager().waitUntilNoRegionsInTransition(60000);
1300
1301
1302 regionStates = master.getAssignmentManager().getRegionStates();
1303
1304
1305 assertTrue(regionStates.isRegionOnline(hriOffline));
1306 assertTrue(regionStates.isRegionOnline(hriOnline));
1307 assertTrue(regionStates.isRegionOnline(failedClose));
1308 assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
1309 assertTrue(regionStates.isRegionOnline(failedOpen));
1310
1311 log("Done with verification, shutting down cluster");
1312
1313
1314 TEST_UTIL.shutdownMiniCluster();
1315 }
1316
1317
1318
1319
1320 @Test(timeout = 180000)
1321 public void testMetaInTransitionWhenMasterFailover() throws Exception {
1322 final int NUM_MASTERS = 1;
1323 final int NUM_RS = 1;
1324
1325
1326 Configuration conf = HBaseConfiguration.create();
1327 conf.setBoolean("hbase.assignment.usezk", false);
1328 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1329 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1330 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1331 log("Cluster started");
1332
1333 log("Moving meta off the master");
1334 HMaster activeMaster = cluster.getMaster();
1335 HRegionServer rs = cluster.getRegionServer(0);
1336 ServerName metaServerName = cluster.getLiveRegionServerThreads()
1337 .get(0).getRegionServer().getServerName();
1338 activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
1339 Bytes.toBytes(metaServerName.getServerName()));
1340 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1341 assertEquals("Meta should be assigned on expected regionserver",
1342 metaServerName, activeMaster.getMetaTableLocator()
1343 .getMetaRegionLocation(activeMaster.getZooKeeper()));
1344
1345
1346 log("Aborting master");
1347 activeMaster.abort("test-kill");
1348 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1349 log("Master has aborted");
1350
1351
1352 RegionState metaState =
1353 MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
1354 assertEquals("hbase:meta should be onlined on RS",
1355 metaState.getServerName(), rs.getServerName());
1356 assertEquals("hbase:meta should be onlined on RS",
1357 metaState.getState(), State.OPEN);
1358
1359
1360 log("Starting up a new master");
1361 activeMaster = cluster.startMaster().getMaster();
1362 log("Waiting for master to be ready");
1363 cluster.waitForActiveAndReadyMaster();
1364 log("Master is ready");
1365
1366
1367 metaState =
1368 MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1369 assertEquals("hbase:meta should be onlined on RS",
1370 metaState.getServerName(), rs.getServerName());
1371 assertEquals("hbase:meta should be onlined on RS",
1372 metaState.getState(), State.OPEN);
1373
1374
1375
1376
1377
1378 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1379 rs.getServerName(), State.PENDING_OPEN);
1380 Region meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1381 rs.removeFromOnlineRegions(meta, null);
1382 ((HRegion)meta).close();
1383
1384 log("Aborting master");
1385 activeMaster.abort("test-kill");
1386 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1387 log("Master has aborted");
1388
1389
1390 log("Starting up a new master");
1391 activeMaster = cluster.startMaster().getMaster();
1392 log("Waiting for master to be ready");
1393 cluster.waitForActiveAndReadyMaster();
1394 log("Master is ready");
1395
1396 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1397 log("Meta was assigned");
1398
1399 metaState =
1400 MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1401 assertEquals("hbase:meta should be onlined on RS",
1402 metaState.getServerName(), rs.getServerName());
1403 assertEquals("hbase:meta should be onlined on RS",
1404 metaState.getState(), State.OPEN);
1405
1406
1407
1408
1409
1410 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1411 rs.getServerName(), State.PENDING_CLOSE);
1412
1413 log("Aborting master");
1414 activeMaster.abort("test-kill");
1415 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1416 log("Master has aborted");
1417
1418 rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest(
1419 rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1420
1421
1422 log("Starting up a new master");
1423 activeMaster = cluster.startMaster().getMaster();
1424 log("Waiting for master to be ready");
1425 cluster.waitForActiveAndReadyMaster();
1426 log("Master is ready");
1427
1428 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1429 log("Meta was assigned");
1430
1431 rs.getRSRpcServices().closeRegion(
1432 null,
1433 RequestConverter.buildCloseRegionRequest(rs.getServerName(),
1434 HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1435
1436
1437 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1438 ServerName.valueOf("dummyserver.example.org", 1234, -1L), State.OPEN);
1439
1440 log("Aborting master");
1441 activeMaster.stop("test-kill");
1442
1443 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1444 log("Master has aborted");
1445
1446
1447 log("Starting up a new master");
1448 activeMaster = cluster.startMaster().getMaster();
1449 log("Waiting for master to be ready");
1450 cluster.waitForActiveAndReadyMaster();
1451 log("Master is ready");
1452
1453 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1454 log("Meta was assigned");
1455
1456
1457 TEST_UTIL.shutdownMiniCluster();
1458 }
1459 }
1460