View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Set;
31  import java.util.TreeSet;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FileSystem;
37  import org.apache.hadoop.fs.Path;
38  import org.apache.hadoop.hbase.Abortable;
39  import org.apache.hadoop.hbase.ClusterStatus;
40  import org.apache.hadoop.hbase.HBaseConfiguration;
41  import org.apache.hadoop.hbase.HBaseTestingUtility;
42  import org.apache.hadoop.hbase.HColumnDescriptor;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.HRegionInfo;
45  import org.apache.hadoop.hbase.HTableDescriptor;
46  import org.apache.hadoop.hbase.testclassification.LargeTests;
47  import org.apache.hadoop.hbase.MetaTableAccessor;
48  import org.apache.hadoop.hbase.MiniHBaseCluster;
49  import org.apache.hadoop.hbase.RegionTransition;
50  import org.apache.hadoop.hbase.ServerName;
51  import org.apache.hadoop.hbase.TableName;
52  import org.apache.hadoop.hbase.TableStateManager;
53  import org.apache.hadoop.hbase.client.RegionLocator;
54  import org.apache.hadoop.hbase.client.Table;
55  import org.apache.hadoop.hbase.executor.EventType;
56  import org.apache.hadoop.hbase.master.RegionState.State;
57  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
58  import org.apache.hadoop.hbase.protobuf.RequestConverter;
59  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
60  import org.apache.hadoop.hbase.regionserver.HRegion;
61  import org.apache.hadoop.hbase.regionserver.HRegionServer;
62  import org.apache.hadoop.hbase.regionserver.Region;
63  import org.apache.hadoop.hbase.regionserver.RegionMergeTransactionImpl;
64  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
65  import org.apache.hadoop.hbase.util.Bytes;
66  import org.apache.hadoop.hbase.util.FSTableDescriptors;
67  import org.apache.hadoop.hbase.util.FSUtils;
68  import org.apache.hadoop.hbase.util.JVMClusterUtil;
69  import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
70  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
71  import org.apache.hadoop.hbase.util.Threads;
72  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
73  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
74  import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
75  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
76  import org.apache.zookeeper.data.Stat;
77  import org.junit.Test;
78  import org.junit.experimental.categories.Category;
79  
80  @Category(LargeTests.class)
81  public class TestMasterFailover {
82    private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
83  
84    /**
85     * Complex test of master failover that tests as many permutations of the
86     * different possible states that regions in transition could be in within ZK.
87     * <p>
88     * This tests the proper handling of these states by the failed-over master
89     * and includes a thorough testing of the timeout code as well.
90     * <p>
91     * Starts with a single master and three regionservers.
92     * <p>
93     * Creates two tables, enabledTable and disabledTable, each containing 5
94     * regions.  The disabledTable is then disabled.
95     * <p>
96     * After reaching steady-state, the master is killed.  We then mock several
97     * states in ZK.
98     * <p>
99     * After mocking them, we will startup a new master which should become the
100    * active master and also detect that it is a failover.  The primary test
101    * passing condition will be that all regions of the enabled table are
102    * assigned and all the regions of the disabled table are not assigned.
103    * <p>
104    * The different scenarios to be tested are below:
105    * <p>
106    * <b>ZK State:  OFFLINE</b>
107    * <p>A node can get into OFFLINE state if</p>
108    * <ul>
109    * <li>An RS fails to open a region, so it reverts the state back to OFFLINE
110    * <li>The Master is assigning the region to a RS before it sends RPC
111    * </ul>
112    * <p>We will mock the scenarios</p>
113    * <ul>
114    * <li>Master has assigned an enabled region but RS failed so a region is
115    *     not assigned anywhere and is sitting in ZK as OFFLINE</li>
116    * <li>This seems to cover both cases?</li>
117    * </ul>
118    * <p>
119    * <b>ZK State:  CLOSING</b>
120    * <p>A node can get into CLOSING state if</p>
121    * <ul>
122    * <li>An RS has begun to close a region
123    * </ul>
124    * <p>We will mock the scenarios</p>
125    * <ul>
126    * <li>Region of enabled table was being closed but did not complete
127    * <li>Region of disabled table was being closed but did not complete
128    * </ul>
129    * <p>
130    * <b>ZK State:  CLOSED</b>
131    * <p>A node can get into CLOSED state if</p>
132    * <ul>
133    * <li>An RS has completed closing a region but not acknowledged by master yet
134    * </ul>
135    * <p>We will mock the scenarios</p>
136    * <ul>
137    * <li>Region of a table that should be enabled was closed on an RS
138    * <li>Region of a table that should be disabled was closed on an RS
139    * </ul>
140    * <p>
141    * <b>ZK State:  OPENING</b>
142    * <p>A node can get into OPENING state if</p>
143    * <ul>
144    * <li>An RS has begun to open a region
145    * </ul>
146    * <p>We will mock the scenarios</p>
147    * <ul>
148    * <li>RS was opening a region of enabled table but never finishes
149    * </ul>
150    * <p>
151    * <b>ZK State:  OPENED</b>
152    * <p>A node can get into OPENED state if</p>
153    * <ul>
154    * <li>An RS has finished opening a region but not acknowledged by master yet
155    * </ul>
156    * <p>We will mock the scenarios</p>
157    * <ul>
158    * <li>Region of a table that should be enabled was opened on an RS
159    * <li>Region of a table that should be disabled was opened on an RS
160    * </ul>
161    * @throws Exception
162    */
163   @Test (timeout=240000)
164   public void testMasterFailoverWithMockedRIT() throws Exception {
165 
166     final int NUM_MASTERS = 1;
167     final int NUM_RS = 3;
168 
169     // Create config to use for this cluster
170     Configuration conf = HBaseConfiguration.create();
171     conf.setBoolean("hbase.assignment.usezk", true);
172 
173     // Start the cluster
174     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
175     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
176     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
177     log("Cluster started");
178 
179     // Create a ZKW to use in the test
180     ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
181 
182     // get all the master threads
183     List<MasterThread> masterThreads = cluster.getMasterThreads();
184     assertEquals(1, masterThreads.size());
185 
186     // only one master thread, let's wait for it to be initialized
187     assertTrue(cluster.waitForActiveAndReadyMaster());
188     HMaster master = masterThreads.get(0).getMaster();
189     assertTrue(master.isActiveMaster());
190     assertTrue(master.isInitialized());
191 
192     // disable load balancing on this master
193     master.balanceSwitch(false);
194 
195     // create two tables in META, each with 10 regions
196     byte [] FAMILY = Bytes.toBytes("family");
197     byte [][] SPLIT_KEYS = new byte [][] {
198         new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
199         Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
200         Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
201         Bytes.toBytes("iii"), Bytes.toBytes("jjj")
202     };
203 
204     byte [] enabledTable = Bytes.toBytes("enabledTable");
205     HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
206     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
207 
208     FileSystem filesystem = FileSystem.get(conf);
209     Path rootdir = FSUtils.getRootDir(conf);
210     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
211     // Write the .tableinfo
212     fstd.createTableDescriptor(htdEnabled);
213 
214     HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(), null, null);
215     createRegion(hriEnabled, rootdir, conf, htdEnabled);
216 
217     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
218         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
219 
220     TableName disabledTable = TableName.valueOf("disabledTable");
221     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
222     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
223     // Write the .tableinfo
224     fstd.createTableDescriptor(htdDisabled);
225     HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
226     createRegion(hriDisabled, rootdir, conf, htdDisabled);
227     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
228         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
229 
230     TableName tableWithMergingRegions = TableName.valueOf("tableWithMergingRegions");
231     TEST_UTIL.createTable(tableWithMergingRegions, FAMILY, new byte [][] {Bytes.toBytes("m")});
232 
233     log("Regions in hbase:meta and namespace have been created");
234 
235     // at this point we expect at least 4 regions to be assigned out
236     // (meta and namespace, + 2 merging regions)
237     assertTrue(4 <= cluster.countServedRegions());
238 
239     // Move merging regions to the same region server
240     AssignmentManager am = master.getAssignmentManager();
241     RegionStates regionStates = am.getRegionStates();
242     List<HRegionInfo> mergingRegions = regionStates.getRegionsOfTable(tableWithMergingRegions);
243     assertEquals(2, mergingRegions.size());
244     HRegionInfo a = mergingRegions.get(0);
245     HRegionInfo b = mergingRegions.get(1);
246     HRegionInfo newRegion = RegionMergeTransactionImpl.getMergedRegionInfo(a, b);
247     ServerName mergingServer = regionStates.getRegionServerOfRegion(a);
248     ServerName serverB = regionStates.getRegionServerOfRegion(b);
249     if (!serverB.equals(mergingServer)) {
250       RegionPlan plan = new RegionPlan(b, serverB, mergingServer);
251       am.balance(plan);
252       assertTrue(am.waitForAssignment(b));
253     }
254 
255     // Let's just assign everything to first RS
256     HRegionServer hrs = cluster.getRegionServer(0);
257     ServerName serverName = hrs.getServerName();
258     HRegionInfo closingRegion = enabledRegions.remove(0);
259     // we'll need some regions to already be assigned out properly on live RS
260     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
261     enabledAndAssignedRegions.add(enabledRegions.remove(0));
262     enabledAndAssignedRegions.add(enabledRegions.remove(0));
263     enabledAndAssignedRegions.add(closingRegion);
264 
265     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
266     disabledAndAssignedRegions.add(disabledRegions.remove(0));
267     disabledAndAssignedRegions.add(disabledRegions.remove(0));
268 
269     // now actually assign them
270     for (HRegionInfo hri : enabledAndAssignedRegions) {
271       master.assignmentManager.addPlan(hri.getEncodedName(),
272           new RegionPlan(hri, null, serverName));
273       master.assignRegion(hri);
274     }
275 
276     for (HRegionInfo hri : disabledAndAssignedRegions) {
277       master.assignmentManager.addPlan(hri.getEncodedName(),
278           new RegionPlan(hri, null, serverName));
279       master.assignRegion(hri);
280     }
281 
282     // wait for no more RIT
283     log("Waiting for assignment to finish");
284     ZKAssign.blockUntilNoRIT(zkw);
285     log("Assignment completed");
286 
287     // Stop the master
288     log("Aborting master");
289     cluster.abortMaster(0);
290     cluster.waitOnMaster(0);
291     log("Master has aborted");
292 
293     /*
294      * Now, let's start mocking up some weird states as described in the method
295      * javadoc.
296      */
297 
298     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
299     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
300 
301     log("Beginning to mock scenarios");
302 
303     // Disable the disabledTable in ZK
304     TableStateManager zktable = new ZKTableStateManager(zkw);
305     zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
306 
307     /*
308      *  ZK = OFFLINE
309      */
310 
311     // Region that should be assigned but is not and is in ZK as OFFLINE
312     // Cause: This can happen if the master crashed after creating the znode but before sending the
313     //  request to the region server
314     HRegionInfo region = enabledRegions.remove(0);
315     regionsThatShouldBeOnline.add(region);
316     ZKAssign.createNodeOffline(zkw, region, serverName);
317 
318     /*
319      * ZK = CLOSING
320      */
321     // Cause: Same as offline.
322     regionsThatShouldBeOnline.add(closingRegion);
323     ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
324 
325     /*
326      * ZK = CLOSED
327      */
328 
329     // Region of enabled table closed but not ack
330     //Cause: Master was down while the region server updated the ZK status.
331     region = enabledRegions.remove(0);
332     regionsThatShouldBeOnline.add(region);
333     int version = ZKAssign.createNodeClosing(zkw, region, serverName);
334     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
335 
336     // Region of disabled table closed but not ack
337     region = disabledRegions.remove(0);
338     regionsThatShouldBeOffline.add(region);
339     version = ZKAssign.createNodeClosing(zkw, region, serverName);
340     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
341 
342     /*
343      * ZK = OPENED
344      */
345 
346     // Region of enabled table was opened on RS
347     // Cause: as offline
348     region = enabledRegions.remove(0);
349     regionsThatShouldBeOnline.add(region);
350     ZKAssign.createNodeOffline(zkw, region, serverName);
351     ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
352     while (true) {
353       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
354       RegionTransition rt = RegionTransition.parseFrom(bytes);
355       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
356         break;
357       }
358       Thread.sleep(100);
359     }
360 
361     // Region of disable table was opened on RS
362     // Cause: Master failed while updating the status for this region server.
363     region = disabledRegions.remove(0);
364     regionsThatShouldBeOffline.add(region);
365     ZKAssign.createNodeOffline(zkw, region, serverName);
366     ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
367     while (true) {
368       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
369       RegionTransition rt = RegionTransition.parseFrom(bytes);
370       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
371         break;
372       }
373       Thread.sleep(100);
374     }
375 
376     /*
377      * ZK = MERGING
378      */
379 
380     // Regions of table of merging regions
381     // Cause: Master was down while merging was going on
382     hrs.getCoordinatedStateManager().
383       getRegionMergeCoordination().startRegionMergeTransaction(newRegion, mergingServer, a, b);
384 
385     /*
386      * ZK = NONE
387      */
388 
389     /*
390      * DONE MOCKING
391      */
392 
393     log("Done mocking data up in ZK");
394 
395     // Start up a new master
396     log("Starting up a new master");
397     master = cluster.startMaster().getMaster();
398     log("Waiting for master to be ready");
399     cluster.waitForActiveAndReadyMaster();
400     log("Master is ready");
401 
402     // Get new region states since master restarted
403     regionStates = master.getAssignmentManager().getRegionStates();
404     // Merging region should remain merging
405     assertTrue(regionStates.isRegionInState(a, State.MERGING));
406     assertTrue(regionStates.isRegionInState(b, State.MERGING));
407     assertTrue(regionStates.isRegionInState(newRegion, State.MERGING_NEW));
408     // Now remove the faked merging znode, merging regions should be
409     // offlined automatically, otherwise it is a bug in AM.
410     ZKAssign.deleteNodeFailSilent(zkw, newRegion);
411 
412     // Failover should be completed, now wait for no RIT
413     log("Waiting for no more RIT");
414     ZKAssign.blockUntilNoRIT(zkw);
415     log("No more RIT in ZK, now doing final test verification");
416 
417     // Grab all the regions that are online across RSs
418     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
419     for (JVMClusterUtil.RegionServerThread rst :
420       cluster.getRegionServerThreads()) {
421       onlineRegions.addAll(ProtobufUtil.getOnlineRegions(
422         rst.getRegionServer().getRSRpcServices()));
423     }
424 
425     // Now, everything that should be online should be online
426     for (HRegionInfo hri : regionsThatShouldBeOnline) {
427       assertTrue(onlineRegions.contains(hri));
428     }
429 
430     // Everything that should be offline should not be online
431     for (HRegionInfo hri : regionsThatShouldBeOffline) {
432       if (onlineRegions.contains(hri)) {
433        LOG.debug(hri);
434       }
435       assertFalse(onlineRegions.contains(hri));
436     }
437 
438     log("Done with verification, all passed, shutting down cluster");
439 
440     // Done, shutdown the cluster
441     TEST_UTIL.shutdownMiniCluster();
442   }
443 
444   /**
445    * Complex test of master failover that tests as many permutations of the
446    * different possible states that regions in transition could be in within ZK
447    * pointing to an RS that has died while no master is around to process it.
448    * <p>
449    * This tests the proper handling of these states by the failed-over master
450    * and includes a thorough testing of the timeout code as well.
451    * <p>
452    * Starts with a single master and two regionservers.
453    * <p>
454    * Creates two tables, enabledTable and disabledTable, each containing 5
455    * regions.  The disabledTable is then disabled.
456    * <p>
457    * After reaching steady-state, the master is killed.  We then mock several
458    * states in ZK.  And one of the RS will be killed.
459    * <p>
460    * After mocking them and killing an RS, we will startup a new master which
461    * should become the active master and also detect that it is a failover.  The
462    * primary test passing condition will be that all regions of the enabled
463    * table are assigned and all the regions of the disabled table are not
464    * assigned.
465    * <p>
466    * The different scenarios to be tested are below:
467    * <p>
468    * <b>ZK State:  CLOSING</b>
469    * <p>A node can get into CLOSING state if</p>
470    * <ul>
471    * <li>An RS has begun to close a region
472    * </ul>
473    * <p>We will mock the scenarios</p>
474    * <ul>
475    * <li>Region was being closed but the RS died before finishing the close
476    * </ul>
477    * <b>ZK State:  OPENED</b>
478    * <p>A node can get into OPENED state if</p>
479    * <ul>
480    * <li>An RS has finished opening a region but not acknowledged by master yet
481    * </ul>
482    * <p>We will mock the scenarios</p>
483    * <ul>
484    * <li>Region of a table that should be enabled was opened by a now-dead RS
485    * <li>Region of a table that should be disabled was opened by a now-dead RS
486    * </ul>
487    * <p>
488    * <b>ZK State:  NONE</b>
489    * <p>A region could not have a transition node if</p>
490    * <ul>
491    * <li>The server hosting the region died and no master processed it
492    * </ul>
493    * <p>We will mock the scenarios</p>
494    * <ul>
495    * <li>Region of enabled table was on a dead RS that was not yet processed
496    * <li>Region of disabled table was on a dead RS that was not yet processed
497    * </ul>
498    * @throws Exception
499    */
500   @Test (timeout=180000)
501   public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
502 
503     final int NUM_MASTERS = 1;
504     final int NUM_RS = 2;
505 
506     // Create and start the cluster
507     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
508     Configuration conf = TEST_UTIL.getConfiguration();
509     conf.setBoolean("hbase.assignment.usezk", true);
510     // The test depends on namespace region is online, therefore, we have to
511     // wait for namespace manager starting.
512     conf.setBoolean("hbase.master.start.wait.for.namespacemanager", true);
513 
514     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
515     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2);
516     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
517     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
518     log("Cluster started");
519 
520     // Create a ZKW to use in the test
521     ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
522         "unittest", new Abortable() {
523 
524           @Override
525           public void abort(String why, Throwable e) {
526             LOG.error("Fatal ZK Error: " + why, e);
527             org.junit.Assert.assertFalse("Fatal ZK error", true);
528           }
529 
530           @Override
531           public boolean isAborted() {
532             return false;
533           }
534 
535     });
536 
537     // get all the master threads
538     List<MasterThread> masterThreads = cluster.getMasterThreads();
539     assertEquals(1, masterThreads.size());
540 
541     // only one master thread, let's wait for it to be initialized
542     assertTrue(cluster.waitForActiveAndReadyMaster());
543     HMaster master = masterThreads.get(0).getMaster();
544     assertTrue(master.isActiveMaster());
545     assertTrue(master.isInitialized());
546 
547     // disable load balancing on this master
548     master.balanceSwitch(false);
549 
550     // create two tables in META, each with 30 regions
551     byte [] FAMILY = Bytes.toBytes("family");
552     byte[][] SPLIT_KEYS =
553         TEST_UTIL.getRegionSplitStartKeys(Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 30);
554 
555     byte [] enabledTable = Bytes.toBytes("enabledTable");
556     HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
557     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
558     FileSystem filesystem = FileSystem.get(conf);
559     Path rootdir = FSUtils.getRootDir(conf);
560     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
561     // Write the .tableinfo
562     fstd.createTableDescriptor(htdEnabled);
563     HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(),
564         null, null);
565     createRegion(hriEnabled, rootdir, conf, htdEnabled);
566 
567     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
568         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
569 
570     TableName disabledTable =
571         TableName.valueOf("disabledTable");
572     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
573     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
574     // Write the .tableinfo
575     fstd.createTableDescriptor(htdDisabled);
576     HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
577     createRegion(hriDisabled, rootdir, conf, htdDisabled);
578 
579     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
580         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
581 
582     log("Regions in hbase:meta and Namespace have been created");
583 
584     // at this point we expect at least 2 regions to be assigned out (meta and namespace)
585     assertTrue(2 <= cluster.countServedRegions());
586 
587     // The first RS will stay online
588     List<RegionServerThread> regionservers =
589       cluster.getRegionServerThreads();
590     HRegionServer hrs = regionservers.get(0).getRegionServer();
591 
592     // The second RS is going to be hard-killed
593     RegionServerThread hrsDeadThread = regionservers.get(1);
594     HRegionServer hrsDead = hrsDeadThread.getRegionServer();
595     ServerName deadServerName = hrsDead.getServerName();
596 
597     // we'll need some regions to already be assigned out properly on live RS
598     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
599     enabledAndAssignedRegions.addAll(enabledRegions.subList(0, 6));
600     enabledRegions.removeAll(enabledAndAssignedRegions);
601     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
602     disabledAndAssignedRegions.addAll(disabledRegions.subList(0, 6));
603     disabledRegions.removeAll(disabledAndAssignedRegions);
604 
605     // now actually assign them
606     for (HRegionInfo hri : enabledAndAssignedRegions) {
607       master.assignmentManager.addPlan(hri.getEncodedName(),
608           new RegionPlan(hri, null, hrs.getServerName()));
609       master.assignRegion(hri);
610     }
611     for (HRegionInfo hri : disabledAndAssignedRegions) {
612       master.assignmentManager.addPlan(hri.getEncodedName(),
613           new RegionPlan(hri, null, hrs.getServerName()));
614       master.assignRegion(hri);
615     }
616 
617     log("Waiting for assignment to finish");
618     ZKAssign.blockUntilNoRIT(zkw);
619     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
620     log("Assignment completed");
621 
622     assertTrue(" Table must be enabled.", master.getAssignmentManager()
623         .getTableStateManager().isTableState(TableName.valueOf("enabledTable"),
624         ZooKeeperProtos.Table.State.ENABLED));
625     // we also need regions assigned out on the dead server
626     List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
627     enabledAndOnDeadRegions.addAll(enabledRegions.subList(0, 6));
628     enabledRegions.removeAll(enabledAndOnDeadRegions);
629     List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
630     disabledAndOnDeadRegions.addAll(disabledRegions.subList(0, 6));
631     disabledRegions.removeAll(disabledAndOnDeadRegions);
632 
633     // set region plan to server to be killed and trigger assign
634     for (HRegionInfo hri : enabledAndOnDeadRegions) {
635       master.assignmentManager.addPlan(hri.getEncodedName(),
636           new RegionPlan(hri, null, deadServerName));
637       master.assignRegion(hri);
638     }
639     for (HRegionInfo hri : disabledAndOnDeadRegions) {
640       master.assignmentManager.addPlan(hri.getEncodedName(),
641           new RegionPlan(hri, null, deadServerName));
642       master.assignRegion(hri);
643     }
644 
645     // wait for no more RIT
646     log("Waiting for assignment to finish");
647     ZKAssign.blockUntilNoRIT(zkw);
648     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
649     log("Assignment completed");
650 
651     // Due to master.assignRegion(hri) could fail to assign a region to a specified RS
652     // therefore, we need make sure that regions are in the expected RS
653     verifyRegionLocation(hrs, enabledAndAssignedRegions);
654     verifyRegionLocation(hrs, disabledAndAssignedRegions);
655     verifyRegionLocation(hrsDead, enabledAndOnDeadRegions);
656     verifyRegionLocation(hrsDead, disabledAndOnDeadRegions);
657 
658     assertTrue(" Didn't get enough regions of enabledTalbe on live rs.",
659       enabledAndAssignedRegions.size() >= 2);
660     assertTrue(" Didn't get enough regions of disalbedTable on live rs.",
661       disabledAndAssignedRegions.size() >= 2);
662     assertTrue(" Didn't get enough regions of enabledTalbe on dead rs.",
663       enabledAndOnDeadRegions.size() >= 2);
664     assertTrue(" Didn't get enough regions of disalbedTable on dead rs.",
665       disabledAndOnDeadRegions.size() >= 2);
666 
667     // Stop the master
668     log("Aborting master");
669     cluster.abortMaster(0);
670     cluster.waitOnMaster(0);
671     log("Master has aborted");
672 
673     /*
674      * Now, let's start mocking up some weird states as described in the method
675      * javadoc.
676      */
677 
678     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
679     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
680 
681     log("Beginning to mock scenarios");
682 
683     // Disable the disabledTable in ZK
684     TableStateManager zktable = new ZKTableStateManager(zkw);
685     zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
686 
687     assertTrue(" The enabled table should be identified on master fail over.",
688         zktable.isTableState(TableName.valueOf("enabledTable"),
689           ZooKeeperProtos.Table.State.ENABLED));
690 
691     /*
692      * ZK = CLOSING
693      */
694 
695     // Region of enabled table being closed on dead RS but not finished
696     HRegionInfo region = enabledAndOnDeadRegions.remove(0);
697     regionsThatShouldBeOnline.add(region);
698     ZKAssign.createNodeClosing(zkw, region, deadServerName);
699     LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
700         region + "\n\n");
701 
702     // Region of disabled table being closed on dead RS but not finished
703     region = disabledAndOnDeadRegions.remove(0);
704     regionsThatShouldBeOffline.add(region);
705     ZKAssign.createNodeClosing(zkw, region, deadServerName);
706     LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
707         region + "\n\n");
708 
709     /*
710      * ZK = CLOSED
711      */
712 
713     // Region of enabled on dead server gets closed but not ack'd by master
714     region = enabledAndOnDeadRegions.remove(0);
715     regionsThatShouldBeOnline.add(region);
716     int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
717     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
718     LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
719         region + "\n\n");
720 
721     // Region of disabled on dead server gets closed but not ack'd by master
722     region = disabledAndOnDeadRegions.remove(0);
723     regionsThatShouldBeOffline.add(region);
724     version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
725     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
726     LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
727         region + "\n\n");
728 
729     /*
730      * ZK = OPENING
731      */
732 
733     // RS was opening a region of enabled table then died
734     region = enabledRegions.remove(0);
735     regionsThatShouldBeOnline.add(region);
736     ZKAssign.createNodeOffline(zkw, region, deadServerName);
737     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
738     LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
739         region + "\n\n");
740 
741     // RS was opening a region of disabled table then died
742     region = disabledRegions.remove(0);
743     regionsThatShouldBeOffline.add(region);
744     ZKAssign.createNodeOffline(zkw, region, deadServerName);
745     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
746     LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
747         region + "\n\n");
748 
749     /*
750      * ZK = OPENED
751      */
752 
753     // Region of enabled table was opened on dead RS
754     region = enabledRegions.remove(0);
755     regionsThatShouldBeOnline.add(region);
756     ZKAssign.createNodeOffline(zkw, region, deadServerName);
757     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
758       hrsDead.getServerName(), region);
759     while (true) {
760       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
761       RegionTransition rt = RegionTransition.parseFrom(bytes);
762       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
763         break;
764       }
765       Thread.sleep(100);
766     }
767     LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" +
768         region + "\n\n");
769 
770     // Region of disabled table was opened on dead RS
771     region = disabledRegions.remove(0);
772     regionsThatShouldBeOffline.add(region);
773     ZKAssign.createNodeOffline(zkw, region, deadServerName);
774     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
775       hrsDead.getServerName(), region);
776     while (true) {
777       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
778       RegionTransition rt = RegionTransition.parseFrom(bytes);
779       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
780         break;
781       }
782       Thread.sleep(100);
783     }
784     LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" +
785         region + "\n\n");
786 
787     /*
788      * ZK = NONE
789      */
790 
791     // Region of enabled table was open at steady-state on dead RS
792     region = enabledRegions.remove(0);
793     regionsThatShouldBeOnline.add(region);
794     ZKAssign.createNodeOffline(zkw, region, deadServerName);
795     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
796       hrsDead.getServerName(), region);
797     while (true) {
798       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
799       RegionTransition rt = RegionTransition.parseFrom(bytes);
800       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
801         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
802         LOG.debug("DELETED " + rt);
803         break;
804       }
805       Thread.sleep(100);
806     }
807     LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
808         + "\n" + region + "\n\n");
809 
810     // Region of disabled table was open at steady-state on dead RS
811     region = disabledRegions.remove(0);
812     regionsThatShouldBeOffline.add(region);
813     ZKAssign.createNodeOffline(zkw, region, deadServerName);
814     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
815       hrsDead.getServerName(), region);
816     while (true) {
817       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
818       RegionTransition rt = RegionTransition.parseFrom(bytes);
819       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
820         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
821         break;
822       }
823       Thread.sleep(100);
824     }
825     LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
826       + "\n" + region + "\n\n");
827 
828     /*
829      * DONE MOCKING
830      */
831 
832     log("Done mocking data up in ZK");
833 
834     // Kill the RS that had a hard death
835     log("Killing RS " + deadServerName);
836     hrsDead.abort("Killing for unit test");
837     log("RS " + deadServerName + " killed");
838 
839     // Start up a new master.  Wait until regionserver is completely down
840     // before starting new master because of hbase-4511.
841     while (hrsDeadThread.isAlive()) {
842       Threads.sleep(10);
843     }
844     log("Starting up a new master");
845     master = cluster.startMaster().getMaster();
846     log("Waiting for master to be ready");
847     assertTrue(cluster.waitForActiveAndReadyMaster());
848     log("Master is ready");
849 
850     // Wait until SSH processing completed for dead server.
851     while (master.getServerManager().areDeadServersInProgress()) {
852       Thread.sleep(10);
853     }
854 
855     // Failover should be completed, now wait for no RIT
856     log("Waiting for no more RIT");
857     ZKAssign.blockUntilNoRIT(zkw);
858     log("No more RIT in ZK");
859     long now = System.currentTimeMillis();
860     long maxTime = 120000;
861     boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
862     if (!done) {
863       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
864       LOG.info("rit=" + regionStates.getRegionsInTransition());
865     }
866     long elapsed = System.currentTimeMillis() - now;
867     assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
868       elapsed < maxTime);
869     log("No more RIT in RIT map, doing final test verification");
870 
871     // Grab all the regions that are online across RSs
872     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
873     now = System.currentTimeMillis();
874     maxTime = 30000;
875     for (JVMClusterUtil.RegionServerThread rst :
876         cluster.getRegionServerThreads()) {
877       try {
878         HRegionServer rs = rst.getRegionServer();
879         while (!rs.getRegionsInTransitionInRS().isEmpty()) {
880           elapsed = System.currentTimeMillis() - now;
881           assertTrue("Test timed out in getting online regions", elapsed < maxTime);
882           if (rs.isAborted() || rs.isStopped()) {
883             // This region server is stopped, skip it.
884             break;
885           }
886           Thread.sleep(100);
887         }
888         onlineRegions.addAll(ProtobufUtil.getOnlineRegions(rs.getRSRpcServices()));
889       } catch (RegionServerStoppedException e) {
890         LOG.info("Got RegionServerStoppedException", e);
891       }
892     }
893 
894     // Now, everything that should be online should be online
895     for (HRegionInfo hri : regionsThatShouldBeOnline) {
896       assertTrue("region=" + hri.getRegionNameAsString() + ", " + onlineRegions.toString(),
897         onlineRegions.contains(hri));
898     }
899 
900     // Everything that should be offline should not be online
901     for (HRegionInfo hri : regionsThatShouldBeOffline) {
902       assertFalse(onlineRegions.contains(hri));
903     }
904 
905     log("Done with verification, all passed, shutting down cluster");
906 
907     // Done, shutdown the cluster
908     TEST_UTIL.shutdownMiniCluster();
909   }
910 
911   /**
912    * Verify regions are on the expected region server
913    */
914   private void verifyRegionLocation(HRegionServer hrs, List<HRegionInfo> regions)
915       throws IOException {
916     List<HRegionInfo> tmpOnlineRegions =
917       ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
918     Iterator<HRegionInfo> itr = regions.iterator();
919     while (itr.hasNext()) {
920       HRegionInfo tmp = itr.next();
921       if (!tmpOnlineRegions.contains(tmp)) {
922         itr.remove();
923       }
924     }
925   }
926 
927   HRegion createRegion(final HRegionInfo  hri, final Path rootdir, final Configuration c,
928       final HTableDescriptor htd)
929   throws IOException {
930     HRegion r = HRegion.createHRegion(hri, rootdir, c, htd);
931     // The above call to create a region will create an wal file.  Each
932     // log file create will also create a running thread to do syncing.  We need
933     // to close out this log else we will have a running thread trying to sync
934     // the file system continuously which is ugly when dfs is taken away at the
935     // end of the test.
936     HRegion.closeHRegion(r);
937     return r;
938   }
939 
940   // TODO: Next test to add is with testing permutations of the RIT or the RS
941   //       killed are hosting ROOT and hbase:meta regions.
942 
943   private void log(String string) {
944     LOG.info("\n\n" + string + " \n\n");
945   }
946 
947   @Test (timeout=180000)
948   public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
949       throws Exception {
950     LOG.info("Starting testShouldCheckMasterFailOverWhenMETAIsInOpenedState");
951     final int NUM_MASTERS = 1;
952     final int NUM_RS = 2;
953 
954     // Start the cluster
955     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
956     Configuration conf = TEST_UTIL.getConfiguration();
957     conf.setInt("hbase.master.info.port", -1);
958     conf.setBoolean("hbase.assignment.usezk", true);
959 
960     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
961     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
962 
963     // Find regionserver carrying meta.
964     List<RegionServerThread> regionServerThreads =
965       cluster.getRegionServerThreads();
966     Region metaRegion = null;
967     HRegionServer metaRegionServer = null;
968     for (RegionServerThread regionServerThread : regionServerThreads) {
969       HRegionServer regionServer = regionServerThread.getRegionServer();
970       metaRegion = regionServer.getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
971       regionServer.abort("");
972       if (null != metaRegion) {
973         metaRegionServer = regionServer;
974         break;
975       }
976     }
977 
978     TEST_UTIL.shutdownMiniHBaseCluster();
979 
980     // Create a ZKW to use in the test
981     ZooKeeperWatcher zkw =
982       HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL,
983           metaRegion, metaRegionServer.getServerName());
984 
985     LOG.info("Staring cluster for second time");
986     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
987 
988     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
989     while (!master.isInitialized()) {
990       Thread.sleep(100);
991     }
992     // Failover should be completed, now wait for no RIT
993     log("Waiting for no more RIT");
994     ZKAssign.blockUntilNoRIT(zkw);
995 
996     zkw.close();
997     // Stop the cluster
998     TEST_UTIL.shutdownMiniCluster();
999   }
1000 
1001   /**
1002    * This tests a RIT in offline state will get re-assigned after a master restart
1003    */
1004   @Test(timeout=240000)
1005   public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
1006     final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
1007     final int NUM_MASTERS = 1;
1008     final int NUM_RS = 2;
1009 
1010     // Create config to use for this cluster
1011     Configuration conf = HBaseConfiguration.create();
1012     conf.setBoolean("hbase.assignment.usezk", true);
1013 
1014     // Start the cluster
1015     final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1016     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1017     log("Cluster started");
1018 
1019     TEST_UTIL.createTable(table, Bytes.toBytes("family"));
1020     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1021     RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1022     HRegionInfo hri = regionStates.getRegionsOfTable(table).get(0);
1023     ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1024     TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
1025 
1026     ServerName dstName = null;
1027     for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
1028       if (!tmpServer.equals(serverName)) {
1029         dstName = tmpServer;
1030         break;
1031       }
1032     }
1033     // find a different server
1034     assertTrue(dstName != null);
1035     // shutdown HBase cluster
1036     TEST_UTIL.shutdownMiniHBaseCluster();
1037     // create a RIT node in offline state
1038     ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
1039     ZKAssign.createNodeOffline(zkw, hri, dstName);
1040     Stat stat = new Stat();
1041     byte[] data =
1042         ZKAssign.getDataNoWatch(zkw, hri.getEncodedName(), stat);
1043     assertTrue(data != null);
1044     RegionTransition rt = RegionTransition.parseFrom(data);
1045     assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
1046 
1047     LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
1048         + " and dst server=" + dstName);
1049 
1050     // start HBase cluster
1051     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
1052 
1053     while (true) {
1054       master = TEST_UTIL.getHBaseCluster().getMaster();
1055       if (master != null && master.isInitialized()) {
1056         ServerManager serverManager = master.getServerManager();
1057         if (!serverManager.areDeadServersInProgress()) {
1058           break;
1059         }
1060       }
1061       Thread.sleep(200);
1062     }
1063 
1064     // verify the region is assigned
1065     master = TEST_UTIL.getHBaseCluster().getMaster();
1066     master.getAssignmentManager().waitForAssignment(hri);
1067     regionStates = master.getAssignmentManager().getRegionStates();
1068     RegionState newState = regionStates.getRegionState(hri);
1069     assertTrue(newState.isOpened());
1070   }
1071   
1072  /**
1073    * Simple test of master failover.
1074    * <p>
1075    * Starts with three masters.  Kills a backup master.  Then kills the active
1076    * master.  Ensures the final master becomes active and we can still contact
1077    * the cluster.
1078    * @throws Exception
1079    */
1080   @Test (timeout=240000)
1081   public void testSimpleMasterFailover() throws Exception {
1082 
1083     final int NUM_MASTERS = 3;
1084     final int NUM_RS = 3;
1085 
1086     // Start the cluster
1087     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
1088 
1089     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1090     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1091 
1092     // get all the master threads
1093     List<MasterThread> masterThreads = cluster.getMasterThreads();
1094 
1095     // wait for each to come online
1096     for (MasterThread mt : masterThreads) {
1097       assertTrue(mt.isAlive());
1098     }
1099 
1100     // verify only one is the active master and we have right number
1101     int numActive = 0;
1102     int activeIndex = -1;
1103     ServerName activeName = null;
1104     HMaster active = null;
1105     for (int i = 0; i < masterThreads.size(); i++) {
1106       if (masterThreads.get(i).getMaster().isActiveMaster()) {
1107         numActive++;
1108         activeIndex = i;
1109         active = masterThreads.get(activeIndex).getMaster();
1110         activeName = active.getServerName();
1111       }
1112     }
1113     assertEquals(1, numActive);
1114     assertEquals(NUM_MASTERS, masterThreads.size());
1115     LOG.info("Active master " + activeName);
1116 
1117     // Check that ClusterStatus reports the correct active and backup masters
1118     assertNotNull(active);
1119     ClusterStatus status = active.getClusterStatus();
1120     assertTrue(status.getMaster().equals(activeName));
1121     assertEquals(2, status.getBackupMastersSize());
1122     assertEquals(2, status.getBackupMasters().size());
1123 
1124     // attempt to stop one of the inactive masters
1125     int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
1126     HMaster master = cluster.getMaster(backupIndex);
1127     LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
1128     cluster.stopMaster(backupIndex, false);
1129     cluster.waitOnMaster(backupIndex);
1130 
1131     // Verify still one active master and it's the same
1132     for (int i = 0; i < masterThreads.size(); i++) {
1133       if (masterThreads.get(i).getMaster().isActiveMaster()) {
1134         assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
1135         activeIndex = i;
1136         active = masterThreads.get(activeIndex).getMaster();
1137       }
1138     }
1139     assertEquals(1, numActive);
1140     assertEquals(2, masterThreads.size());
1141     int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
1142     LOG.info("Active master " + active.getServerName() + " managing " + rsCount +  " regions servers");
1143     assertEquals(3, rsCount);
1144 
1145     // Check that ClusterStatus reports the correct active and backup masters
1146     assertNotNull(active);
1147     status = active.getClusterStatus();
1148     assertTrue(status.getMaster().equals(activeName));
1149     assertEquals(1, status.getBackupMastersSize());
1150     assertEquals(1, status.getBackupMasters().size());
1151 
1152     // kill the active master
1153     LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
1154     cluster.stopMaster(activeIndex, false);
1155     cluster.waitOnMaster(activeIndex);
1156 
1157     // wait for an active master to show up and be ready
1158     assertTrue(cluster.waitForActiveAndReadyMaster());
1159 
1160     LOG.debug("\n\nVerifying backup master is now active\n");
1161     // should only have one master now
1162     assertEquals(1, masterThreads.size());
1163 
1164     // and he should be active
1165     active = masterThreads.get(0).getMaster();
1166     assertNotNull(active);
1167     status = active.getClusterStatus();
1168     ServerName mastername = status.getMaster();
1169     assertTrue(mastername.equals(active.getServerName()));
1170     assertTrue(active.isActiveMaster());
1171     assertEquals(0, status.getBackupMastersSize());
1172     assertEquals(0, status.getBackupMasters().size());
1173     int rss = status.getServersSize();
1174     LOG.info("Active master " + mastername.getServerName() + " managing " +
1175       rss +  " region servers");
1176     assertEquals(3, rss);
1177 
1178     // Stop the cluster
1179     TEST_UTIL.shutdownMiniCluster();
1180   }
1181 
1182   /**
1183    * Test region in pending_open/close and failed_open/close when master failover
1184    */
1185   @Test (timeout=180000)
1186   @SuppressWarnings("deprecation")
1187   public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
1188     final int NUM_MASTERS = 1;
1189     final int NUM_RS = 1;
1190 
1191     // Create config to use for this cluster
1192     Configuration conf = HBaseConfiguration.create();
1193     conf.setBoolean("hbase.assignment.usezk", false);
1194 
1195     // Start the cluster
1196     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1197     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1198     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1199     log("Cluster started");
1200 
1201     // get all the master threads
1202     List<MasterThread> masterThreads = cluster.getMasterThreads();
1203     assertEquals(1, masterThreads.size());
1204 
1205     // only one master thread, let's wait for it to be initialized
1206     assertTrue(cluster.waitForActiveAndReadyMaster());
1207     HMaster master = masterThreads.get(0).getMaster();
1208     assertTrue(master.isActiveMaster());
1209     assertTrue(master.isInitialized());
1210 
1211     // Create a table with a region online
1212     Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
1213     onlineTable.close();
1214     // Create a table in META, so it has a region offline
1215     HTableDescriptor offlineTable = new HTableDescriptor(
1216       TableName.valueOf(Bytes.toBytes("offlineTable")));
1217     offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
1218 
1219     FileSystem filesystem = FileSystem.get(conf);
1220     Path rootdir = FSUtils.getRootDir(conf);
1221     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
1222     fstd.createTableDescriptor(offlineTable);
1223 
1224     HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
1225     createRegion(hriOffline, rootdir, conf, offlineTable);
1226     MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);
1227 
1228     log("Regions in hbase:meta and namespace have been created");
1229 
1230     // at this point we expect at least 3 regions to be assigned out
1231     // (meta and namespace, + 1 online region)
1232     assertTrue(3 <= cluster.countServedRegions());
1233     HRegionInfo hriOnline = null;
1234     try (RegionLocator locator =
1235         TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
1236       hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
1237     }
1238     RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1239     RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
1240 
1241     // Put the online region in pending_close. It is actually already opened.
1242     // This is to simulate that the region close RPC is not sent out before failover
1243     RegionState oldState = regionStates.getRegionState(hriOnline);
1244     RegionState newState = new RegionState(
1245       hriOnline, State.PENDING_CLOSE, oldState.getServerName());
1246     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1247 
1248     // Put the offline region in pending_open. It is actually not opened yet.
1249     // This is to simulate that the region open RPC is not sent out before failover
1250     oldState = new RegionState(hriOffline, State.OFFLINE);
1251     newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
1252     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1253     
1254     HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
1255     createRegion(failedClose, rootdir, conf, offlineTable);
1256     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
1257     
1258     oldState = new RegionState(failedClose, State.PENDING_CLOSE);
1259     newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
1260     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1261     
1262    
1263     HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
1264     createRegion(failedOpen, rootdir, conf, offlineTable);
1265     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
1266     
1267     // Simulate a region transitioning to failed open when the region server reports the
1268     // transition as FAILED_OPEN
1269     oldState = new RegionState(failedOpen, State.PENDING_OPEN);
1270     newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
1271     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1272     
1273     HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
1274     createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
1275     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
1276     
1277     // Simulate a region transitioning to failed open when the master couldn't find a plan for
1278     // the region
1279     oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
1280     newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
1281     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1282     
1283     
1284 
1285     // Stop the master
1286     log("Aborting master");
1287     cluster.abortMaster(0);
1288     cluster.waitOnMaster(0);
1289     log("Master has aborted");
1290 
1291     // Start up a new master
1292     log("Starting up a new master");
1293     master = cluster.startMaster().getMaster();
1294     log("Waiting for master to be ready");
1295     cluster.waitForActiveAndReadyMaster();
1296     log("Master is ready");
1297 
1298     // Wait till no region in transition any more
1299     master.getAssignmentManager().waitUntilNoRegionsInTransition(60000);
1300 
1301     // Get new region states since master restarted
1302     regionStates = master.getAssignmentManager().getRegionStates();
1303 
1304     // Both pending_open (RPC sent/not yet) regions should be online
1305     assertTrue(regionStates.isRegionOnline(hriOffline));
1306     assertTrue(regionStates.isRegionOnline(hriOnline));
1307     assertTrue(regionStates.isRegionOnline(failedClose));
1308     assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
1309     assertTrue(regionStates.isRegionOnline(failedOpen));
1310     
1311     log("Done with verification, shutting down cluster");
1312 
1313     // Done, shutdown the cluster
1314     TEST_UTIL.shutdownMiniCluster();
1315   }
1316 
1317   /**
1318    * Test meta in transition when master failover
1319    */
1320   @Test(timeout = 180000)
1321   public void testMetaInTransitionWhenMasterFailover() throws Exception {
1322     final int NUM_MASTERS = 1;
1323     final int NUM_RS = 1;
1324 
1325     // Start the cluster
1326     Configuration conf = HBaseConfiguration.create();
1327     conf.setBoolean("hbase.assignment.usezk", false);
1328     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1329     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1330     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1331     log("Cluster started");
1332 
1333     log("Moving meta off the master");
1334     HMaster activeMaster = cluster.getMaster();
1335     HRegionServer rs = cluster.getRegionServer(0);
1336     ServerName metaServerName = cluster.getLiveRegionServerThreads()
1337       .get(0).getRegionServer().getServerName();
1338     activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
1339       Bytes.toBytes(metaServerName.getServerName()));
1340     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1341     assertEquals("Meta should be assigned on expected regionserver",
1342       metaServerName, activeMaster.getMetaTableLocator()
1343         .getMetaRegionLocation(activeMaster.getZooKeeper()));
1344 
1345     // Now kill master, meta should remain on rs, where we placed it before.
1346     log("Aborting master");
1347     activeMaster.abort("test-kill");
1348     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1349     log("Master has aborted");
1350 
1351     // meta should remain where it was
1352     RegionState metaState =
1353       MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
1354     assertEquals("hbase:meta should be onlined on RS",
1355       metaState.getServerName(), rs.getServerName());
1356     assertEquals("hbase:meta should be onlined on RS",
1357       metaState.getState(), State.OPEN);
1358 
1359     // Start up a new master
1360     log("Starting up a new master");
1361     activeMaster = cluster.startMaster().getMaster();
1362     log("Waiting for master to be ready");
1363     cluster.waitForActiveAndReadyMaster();
1364     log("Master is ready");
1365 
1366     // ensure meta is still deployed on RS
1367     metaState =
1368       MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1369     assertEquals("hbase:meta should be onlined on RS",
1370       metaState.getServerName(), rs.getServerName());
1371     assertEquals("hbase:meta should be onlined on RS",
1372       metaState.getState(), State.OPEN);
1373 
1374     // Update meta state as PENDING_OPEN, then kill master
1375     // that simulates, that RS successfully deployed, but
1376     // RPC was lost right before failure.
1377     // region server should expire (how it can be verified?)
1378     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1379       rs.getServerName(), State.PENDING_OPEN);
1380     Region meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1381     rs.removeFromOnlineRegions(meta, null);
1382     ((HRegion)meta).close();
1383 
1384     log("Aborting master");
1385     activeMaster.abort("test-kill");
1386     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1387     log("Master has aborted");
1388 
1389     // Start up a new master
1390     log("Starting up a new master");
1391     activeMaster = cluster.startMaster().getMaster();
1392     log("Waiting for master to be ready");
1393     cluster.waitForActiveAndReadyMaster();
1394     log("Master is ready");
1395 
1396     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1397     log("Meta was assigned");
1398 
1399     metaState =
1400       MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1401     assertEquals("hbase:meta should be onlined on RS",
1402       metaState.getServerName(), rs.getServerName());
1403     assertEquals("hbase:meta should be onlined on RS",
1404       metaState.getState(), State.OPEN);
1405 
1406     // Update meta state as PENDING_CLOSE, then kill master
1407     // that simulates, that RS successfully deployed, but
1408     // RPC was lost right before failure.
1409     // region server should expire (how it can be verified?)
1410     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1411       rs.getServerName(), State.PENDING_CLOSE);
1412 
1413     log("Aborting master");
1414     activeMaster.abort("test-kill");
1415     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1416     log("Master has aborted");
1417 
1418     rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest(
1419       rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1420 
1421     // Start up a new master
1422     log("Starting up a new master");
1423     activeMaster = cluster.startMaster().getMaster();
1424     log("Waiting for master to be ready");
1425     cluster.waitForActiveAndReadyMaster();
1426     log("Master is ready");
1427 
1428     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1429     log("Meta was assigned");
1430 
1431     rs.getRSRpcServices().closeRegion(
1432       null,
1433       RequestConverter.buildCloseRegionRequest(rs.getServerName(),
1434         HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1435 
1436     // Set a dummy server to check if master reassigns meta on restart
1437     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1438       ServerName.valueOf("dummyserver.example.org", 1234, -1L), State.OPEN);
1439 
1440     log("Aborting master");
1441     activeMaster.stop("test-kill");
1442 
1443     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1444     log("Master has aborted");
1445 
1446     // Start up a new master
1447     log("Starting up a new master");
1448     activeMaster = cluster.startMaster().getMaster();
1449     log("Waiting for master to be ready");
1450     cluster.waitForActiveAndReadyMaster();
1451     log("Master is ready");
1452 
1453     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1454     log("Meta was assigned");
1455 
1456     // Done, shutdown the cluster
1457     TEST_UTIL.shutdownMiniCluster();
1458   }
1459 }
1460