View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotSame;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.Arrays;
29  import java.util.HashMap;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.concurrent.atomic.AtomicBoolean;
33  
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.hbase.CellScannable;
36  import org.apache.hadoop.hbase.CellUtil;
37  import org.apache.hadoop.hbase.CoordinatedStateException;
38  import org.apache.hadoop.hbase.CoordinatedStateManager;
39  import org.apache.hadoop.hbase.CoordinatedStateManagerFactory;
40  import org.apache.hadoop.hbase.DoNotRetryIOException;
41  import org.apache.hadoop.hbase.HBaseConfiguration;
42  import org.apache.hadoop.hbase.HBaseTestingUtility;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.HRegionInfo;
45  import org.apache.hadoop.hbase.HTableDescriptor;
46  import org.apache.hadoop.hbase.MetaMockingUtil;
47  import org.apache.hadoop.hbase.RegionException;
48  import org.apache.hadoop.hbase.RegionTransition;
49  import org.apache.hadoop.hbase.Server;
50  import org.apache.hadoop.hbase.ServerLoad;
51  import org.apache.hadoop.hbase.ServerName;
52  import org.apache.hadoop.hbase.TableName;
53  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
54  import org.apache.hadoop.hbase.client.ClusterConnection;
55  import org.apache.hadoop.hbase.client.HConnectionTestingUtility;
56  import org.apache.hadoop.hbase.client.Result;
57  import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
58  import org.apache.hadoop.hbase.coordination.OpenRegionCoordination;
59  import org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager;
60  import org.apache.hadoop.hbase.coordination.ZkOpenRegionCoordination;
61  import org.apache.hadoop.hbase.exceptions.DeserializationException;
62  import org.apache.hadoop.hbase.executor.EventType;
63  import org.apache.hadoop.hbase.executor.ExecutorService;
64  import org.apache.hadoop.hbase.executor.ExecutorType;
65  import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
66  import org.apache.hadoop.hbase.master.RegionState.State;
67  import org.apache.hadoop.hbase.master.TableLockManager.NullTableLockManager;
68  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
69  import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
70  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
71  import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
72  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
73  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
74  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetRequest;
75  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetResponse;
76  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
77  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse;
78  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
79  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table;
80  import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
81  import org.apache.hadoop.hbase.regionserver.RegionServerAbortedException;
82  import org.apache.hadoop.hbase.testclassification.MediumTests;
83  import org.apache.hadoop.hbase.util.Bytes;
84  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
85  import org.apache.hadoop.hbase.util.Threads;
86  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
87  import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
88  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
89  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
90  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
91  import org.apache.zookeeper.KeeperException;
92  import org.apache.zookeeper.KeeperException.NodeExistsException;
93  import org.apache.zookeeper.Watcher;
94  import org.junit.After;
95  import org.junit.AfterClass;
96  import org.junit.Before;
97  import org.junit.BeforeClass;
98  import org.junit.Test;
99  import org.junit.experimental.categories.Category;
100 import org.mockito.Mockito;
101 import org.mockito.internal.util.reflection.Whitebox;
102 import org.mockito.invocation.InvocationOnMock;
103 import org.mockito.stubbing.Answer;
104 
105 import com.google.protobuf.RpcController;
106 import com.google.protobuf.ServiceException;
107 
108 
109 /**
110  * Test {@link AssignmentManager}
111  */
112 @Category(MediumTests.class)
113 public class TestAssignmentManager {
114   private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
115   private static final ServerName SERVERNAME_A =
116       ServerName.valueOf("example.org", 1234, 5678);
117   private static final ServerName SERVERNAME_AA =
118       ServerName.valueOf("example.org", 1234, 9999);
119   private static final ServerName SERVERNAME_B =
120       ServerName.valueOf("example.org", 0, 5678);
121   private static final HRegionInfo REGIONINFO =
122     new HRegionInfo(TableName.valueOf("t"),
123       HConstants.EMPTY_START_ROW, HConstants.EMPTY_START_ROW);
124   private static int assignmentCount;
125   private static boolean enabling = false;
126 
127   // Mocked objects or; get redone for each test.
128   private MasterServices server;
129   private ServerManager serverManager;
130   private ZooKeeperWatcher watcher;
131   private CoordinatedStateManager cp;
132   private MetaTableLocator mtl;
133   private LoadBalancer balancer;
134   private HMaster master;
135   private ClusterConnection connection;
136 
137   @BeforeClass
138   public static void beforeClass() throws Exception {
139     HTU.getConfiguration().setBoolean("hbase.assignment.usezk", true);
140     HTU.startMiniZKCluster();
141   }
142 
143   @AfterClass
144   public static void afterClass() throws IOException {
145     HTU.shutdownMiniZKCluster();
146   }
147 
148   @Before
149   public void before() throws ZooKeeperConnectionException, IOException {
150     // TODO: Make generic versions of what we do below and put up in a mocking
151     // utility class or move up into HBaseTestingUtility.
152 
153     // Mock a Server.  Have it return a legit Configuration and ZooKeeperWatcher.
154     // If abort is called, be sure to fail the test (don't just swallow it
155     // silently as is mockito default).
156     this.server = Mockito.mock(MasterServices.class);
157     Mockito.when(server.getServerName()).thenReturn(ServerName.valueOf("master,1,1"));
158     Mockito.when(server.getConfiguration()).thenReturn(HTU.getConfiguration());
159     this.watcher =
160       new ZooKeeperWatcher(HTU.getConfiguration(), "mockedServer", this.server, true);
161     Mockito.when(server.getZooKeeper()).thenReturn(this.watcher);
162     Mockito.doThrow(new RuntimeException("Aborted")).
163       when(server).abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
164 
165     cp = new ZkCoordinatedStateManager();
166     cp.initialize(this.server);
167     cp.start();
168 
169     mtl = Mockito.mock(MetaTableLocator.class);
170 
171     Mockito.when(server.getCoordinatedStateManager()).thenReturn(cp);
172     Mockito.when(server.getMetaTableLocator()).thenReturn(mtl);
173 
174     // Get a connection w/ mocked up common methods.
175     this.connection =
176       (ClusterConnection)HConnectionTestingUtility.getMockedConnection(HTU.getConfiguration());
177 
178     // Make it so we can get a catalogtracker from servermanager.. .needed
179     // down in guts of server shutdown handler.
180     Mockito.when(server.getConnection()).thenReturn(connection);
181     Mockito.when(connection.isManaged()).thenReturn(true);
182 
183     // Mock a ServerManager.  Say server SERVERNAME_{A,B} are online.  Also
184     // make it so if close or open, we return 'success'.
185     this.serverManager = Mockito.mock(ServerManager.class);
186     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
187     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_B)).thenReturn(true);
188     Mockito.when(this.serverManager.getDeadServers()).thenReturn(new DeadServer());
189     final Map<ServerName, ServerLoad> onlineServers = new HashMap<ServerName, ServerLoad>();
190     onlineServers.put(SERVERNAME_B, ServerLoad.EMPTY_SERVERLOAD);
191     onlineServers.put(SERVERNAME_A, ServerLoad.EMPTY_SERVERLOAD);
192     Mockito.when(this.serverManager.getOnlineServersList()).thenReturn(
193         new ArrayList<ServerName>(onlineServers.keySet()));
194     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers);
195 
196     List<ServerName> avServers = new ArrayList<ServerName>();
197     avServers.addAll(onlineServers.keySet());
198     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(avServers);
199     Mockito.when(this.serverManager.createDestinationServersList(new ArrayList<ServerName>())).thenReturn(avServers);
200 
201     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, REGIONINFO, -1)).
202       thenReturn(true);
203     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_B, REGIONINFO, -1)).
204       thenReturn(true);
205     // Ditto on open.
206     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_A, REGIONINFO, -1, null)).
207       thenReturn(RegionOpeningState.OPENED);
208     Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_B, REGIONINFO, -1, null)).
209       thenReturn(RegionOpeningState.OPENED);
210     this.master = Mockito.mock(HMaster.class);
211 
212     Mockito.when(this.master.getServerManager()).thenReturn(serverManager);
213   }
214 
215   @After public void after() throws KeeperException, IOException {
216     if (this.watcher != null) {
217       // Clean up all znodes
218       ZKAssign.deleteAllNodes(this.watcher);
219       this.watcher.close();
220       this.cp.stop();
221     }
222     if (this.connection != null) this.connection.close();
223   }
224 
225   /**
226    * Test a balance going on at same time as a master failover
227    *
228    * @throws IOException
229    * @throws KeeperException
230    * @throws InterruptedException
231    * @throws DeserializationException
232    */
233   @Test(timeout = 60000)
234   public void testBalanceOnMasterFailoverScenarioWithOpenedNode()
235       throws IOException, KeeperException, InterruptedException, ServiceException,
236       DeserializationException, CoordinatedStateException {
237     AssignmentManagerWithExtrasForTesting am =
238       setUpMockedAssignmentManager(this.server, this.serverManager);
239     try {
240       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
241       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
242       while (!am.processRITInvoked) Thread.sleep(1);
243       // As part of the failover cleanup, the balancing region plan is removed.
244       // So a random server will be used to open the region. For testing purpose,
245       // let's assume it is going to open on server b:
246       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
247 
248       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
249 
250       // Now fake the region closing successfully over on the regionserver; the
251       // regionserver will have set the region in CLOSED state. This will
252       // trigger callback into AM. The below zk close call is from the RS close
253       // region handler duplicated here because its down deep in a private
254       // method hard to expose.
255       int versionid =
256         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
257       assertNotSame(versionid, -1);
258       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
259 
260       // Get current versionid else will fail on transition from OFFLINE to
261       // OPENING below
262       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
263       assertNotSame(-1, versionid);
264       // This uglyness below is what the openregionhandler on RS side does.
265       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
266         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
267         EventType.RS_ZK_REGION_OPENING, versionid);
268       assertNotSame(-1, versionid);
269       // Move znode from OPENING to OPENED as RS does on successful open.
270       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
271         SERVERNAME_B, versionid);
272       assertNotSame(-1, versionid);
273       am.gate.set(false);
274       // Block here until our znode is cleared or until this test times out.
275       ZKAssign.blockUntilNoRIT(watcher);
276     } finally {
277       am.getExecutorService().shutdown();
278       am.shutdown();
279     }
280   }
281 
282   @Test(timeout = 60000)
283   public void testBalanceOnMasterFailoverScenarioWithClosedNode()
284       throws IOException, KeeperException, InterruptedException, ServiceException,
285         DeserializationException, CoordinatedStateException {
286     AssignmentManagerWithExtrasForTesting am =
287       setUpMockedAssignmentManager(this.server, this.serverManager);
288     try {
289       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
290       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
291       while (!am.processRITInvoked) Thread.sleep(1);
292       // As part of the failover cleanup, the balancing region plan is removed.
293       // So a random server will be used to open the region. For testing purpose,
294       // let's assume it is going to open on server b:
295       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
296 
297       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
298 
299       // Now fake the region closing successfully over on the regionserver; the
300       // regionserver will have set the region in CLOSED state. This will
301       // trigger callback into AM. The below zk close call is from the RS close
302       // region handler duplicated here because its down deep in a private
303       // method hard to expose.
304       int versionid =
305         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
306       assertNotSame(versionid, -1);
307       am.gate.set(false);
308       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
309 
310       // Get current versionid else will fail on transition from OFFLINE to
311       // OPENING below
312       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
313       assertNotSame(-1, versionid);
314       // This uglyness below is what the openregionhandler on RS side does.
315       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
316           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
317           EventType.RS_ZK_REGION_OPENING, versionid);
318       assertNotSame(-1, versionid);
319       // Move znode from OPENING to OPENED as RS does on successful open.
320       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
321           SERVERNAME_B, versionid);
322       assertNotSame(-1, versionid);
323 
324       // Block here until our znode is cleared or until this test timesout.
325       ZKAssign.blockUntilNoRIT(watcher);
326     } finally {
327       am.getExecutorService().shutdown();
328       am.shutdown();
329     }
330   }
331 
332   @Test(timeout = 60000)
333   public void testBalanceOnMasterFailoverScenarioWithOfflineNode()
334       throws IOException, KeeperException, InterruptedException, ServiceException,
335       DeserializationException, CoordinatedStateException {
336     AssignmentManagerWithExtrasForTesting am =
337       setUpMockedAssignmentManager(this.server, this.serverManager);
338     try {
339       createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO);
340       startFakeFailedOverMasterAssignmentManager(am, this.watcher);
341       while (!am.processRITInvoked) Thread.sleep(1);
342       // As part of the failover cleanup, the balancing region plan is removed.
343       // So a random server will be used to open the region. For testing purpose,
344       // let's assume it is going to open on server b:
345       am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
346 
347       Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
348 
349       // Now fake the region closing successfully over on the regionserver; the
350       // regionserver will have set the region in CLOSED state. This will
351       // trigger callback into AM. The below zk close call is from the RS close
352       // region handler duplicated here because its down deep in a private
353       // method hard to expose.
354       int versionid =
355         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
356       assertNotSame(versionid, -1);
357       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
358 
359       am.gate.set(false);
360       // Get current versionid else will fail on transition from OFFLINE to
361       // OPENING below
362       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
363       assertNotSame(-1, versionid);
364       // This uglyness below is what the openregionhandler on RS side does.
365       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
366           SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
367           EventType.RS_ZK_REGION_OPENING, versionid);
368       assertNotSame(-1, versionid);
369       // Move znode from OPENING to OPENED as RS does on successful open.
370       versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO,
371           SERVERNAME_B, versionid);
372       assertNotSame(-1, versionid);
373       // Block here until our znode is cleared or until this test timesout.
374       ZKAssign.blockUntilNoRIT(watcher);
375     } finally {
376       am.getExecutorService().shutdown();
377       am.shutdown();
378     }
379   }
380 
381   private void createRegionPlanAndBalance(
382       final AssignmentManager am, final ServerName from,
383       final ServerName to, final HRegionInfo hri) throws RegionException {
384     // Call the balance function but fake the region being online first at
385     // servername from.
386     am.regionOnline(hri, from);
387     // Balance region from 'from' to 'to'. It calls unassign setting CLOSING state
388     // up in zk.  Create a plan and balance
389     am.balance(new RegionPlan(hri, from, to));
390   }
391 
392   /**
393    * Tests AssignmentManager balance function.  Runs a balance moving a region
394    * from one server to another mocking regionserver responding over zk.
395    * @throws IOException
396    * @throws KeeperException
397    * @throws DeserializationException
398    */
399   @Test (timeout=180000)
400   public void testBalance() throws IOException, KeeperException, DeserializationException,
401       InterruptedException, CoordinatedStateException {
402     // Create and startup an executor.  This is used by AssignmentManager
403     // handling zk callbacks.
404     ExecutorService executor = startupMasterExecutor("testBalanceExecutor");
405 
406     // We need a mocked catalog tracker.
407     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
408         .getConfiguration());
409     // Create an AM.
410     AssignmentManager am = new AssignmentManager(this.server,
411       this.serverManager, balancer, executor, null, master.getTableLockManager());
412     am.failoverCleanupDone.set(true);
413     try {
414       // Make sure our new AM gets callbacks; once registered, can't unregister.
415       // Thats ok because we make a new zk watcher for each test.
416       this.watcher.registerListenerFirst(am);
417       // Call the balance function but fake the region being online first at
418       // SERVERNAME_A.  Create a balance plan.
419       am.regionOnline(REGIONINFO, SERVERNAME_A);
420       // Balance region from A to B.
421       RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
422       am.balance(plan);
423 
424       RegionStates regionStates = am.getRegionStates();
425       // Must be failed to close since the server is fake
426       assertTrue(regionStates.isRegionInTransition(REGIONINFO)
427         && regionStates.isRegionInState(REGIONINFO, State.FAILED_CLOSE));
428       // Move it back to pending_close
429       regionStates.updateRegionState(REGIONINFO, State.PENDING_CLOSE);
430 
431       // Now fake the region closing successfully over on the regionserver; the
432       // regionserver will have set the region in CLOSED state.  This will
433       // trigger callback into AM. The below zk close call is from the RS close
434       // region handler duplicated here because its down deep in a private
435       // method hard to expose.
436       int versionid =
437         ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1);
438       assertNotSame(versionid, -1);
439       // AM is going to notice above CLOSED and queue up a new assign.  The
440       // assign will go to open the region in the new location set by the
441       // balancer.  The zk node will be OFFLINE waiting for regionserver to
442       // transition it through OPENING, OPENED.  Wait till we see the OFFLINE
443       // zk node before we proceed.
444       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
445 
446       // Get current versionid else will fail on transition from OFFLINE to OPENING below
447       versionid = ZKAssign.getVersion(this.watcher, REGIONINFO);
448       assertNotSame(-1, versionid);
449       // This uglyness below is what the openregionhandler on RS side does.
450       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
451         SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
452         EventType.RS_ZK_REGION_OPENING, versionid);
453       assertNotSame(-1, versionid);
454       // Move znode from OPENING to OPENED as RS does on successful open.
455       versionid =
456         ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, SERVERNAME_B, versionid);
457       assertNotSame(-1, versionid);
458       // Wait on the handler removing the OPENED znode.
459       while(regionStates.isRegionInTransition(REGIONINFO)) Threads.sleep(1);
460     } finally {
461       executor.shutdown();
462       am.shutdown();
463       // Clean up all znodes
464       ZKAssign.deleteAllNodes(this.watcher);
465     }
466   }
467 
468   /**
469    * Run a simple server shutdown handler.
470    * @throws KeeperException
471    * @throws IOException
472    */
473   @Test (timeout=180000)
474   public void testShutdownHandler()
475       throws KeeperException, IOException, CoordinatedStateException, ServiceException {
476     // Create and startup an executor.  This is used by AssignmentManager
477     // handling zk callbacks.
478     ExecutorService executor = startupMasterExecutor("testShutdownHandler");
479 
480     // Create an AM.
481     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
482         this.server, this.serverManager);
483     try {
484       processServerShutdownHandler(am, false);
485     } finally {
486       executor.shutdown();
487       am.shutdown();
488       // Clean up all znodes
489       ZKAssign.deleteAllNodes(this.watcher);
490     }
491   }
492 
493   /**
494    * Run a simple server shutdown handler after the same server restarts.
495    * @throws KeeperException
496    * @throws IOException
497    */
498   @Test (timeout=180000)
499   public void testShutdownHandlerWithRestartedServer()
500       throws KeeperException, IOException, CoordinatedStateException, ServiceException {
501     // Create and startup an executor.  This is used by AssignmentManager
502     // handling zk callbacks.
503     ExecutorService executor = startupMasterExecutor("testShutdownHandlerWithRestartedServer");
504 
505     // Create an AM.
506     AssignmentManagerWithExtrasForTesting am =
507       setUpMockedAssignmentManager(this.server, this.serverManager);
508     am.getRegionStates().regionOnline(REGIONINFO, SERVERNAME_A);
509     am.getTableStateManager().setTableState(REGIONINFO.getTable(), Table.State.ENABLED);
510     try {
511       processServerShutdownHandler(am, false, true);
512     } finally {
513       executor.shutdown();
514       am.shutdown();
515       // Clean up all znodes
516       ZKAssign.deleteAllNodes(this.watcher);
517     }
518   }
519 
520   /**
521    * To test closed region handler to remove rit and delete corresponding znode
522    * if region in pending close or closing while processing shutdown of a region
523    * server.(HBASE-5927).
524    *
525    * @throws KeeperException
526    * @throws IOException
527    * @throws ServiceException
528    */
529   @Test (timeout=180000)
530   public void testSSHWhenDisableTableInProgress() throws KeeperException, IOException,
531     CoordinatedStateException, ServiceException {
532     testCaseWithPartiallyDisabledState(Table.State.DISABLING);
533     testCaseWithPartiallyDisabledState(Table.State.DISABLED);
534   }
535 
536 
537   /**
538    * To test if the split region is removed from RIT if the region was in SPLITTING state but the RS
539    * has actually completed the splitting in hbase:meta but went down. See HBASE-6070 and also HBASE-5806
540    *
541    * @throws KeeperException
542    * @throws IOException
543    */
544   @Test (timeout=180000)
545   public void testSSHWhenSplitRegionInProgress() throws KeeperException, IOException, Exception {
546     // true indicates the region is split but still in RIT
547     testCaseWithSplitRegionPartial(true);
548     // false indicate the region is not split
549     testCaseWithSplitRegionPartial(false);
550   }
551 
552   private void testCaseWithSplitRegionPartial(boolean regionSplitDone) throws KeeperException,
553       IOException, InterruptedException,
554     CoordinatedStateException, ServiceException {
555     // Create and startup an executor. This is used by AssignmentManager
556     // handling zk callbacks.
557     ExecutorService executor = startupMasterExecutor("testSSHWhenSplitRegionInProgress");
558     // We need a mocked catalog tracker.
559     ZKAssign.deleteAllNodes(this.watcher);
560 
561     // Create an AM.
562     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
563       this.server, this.serverManager);
564     // adding region to regions and servers maps.
565     am.regionOnline(REGIONINFO, SERVERNAME_A);
566     // adding region in pending close.
567     am.getRegionStates().updateRegionState(
568       REGIONINFO, State.SPLITTING, SERVERNAME_A);
569     am.getTableStateManager().setTableState(REGIONINFO.getTable(),
570       Table.State.ENABLED);
571     RegionTransition data = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
572         REGIONINFO.getRegionName(), SERVERNAME_A);
573     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
574     // create znode in M_ZK_REGION_CLOSING state.
575     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
576 
577     try {
578       processServerShutdownHandler(am, regionSplitDone);
579       // check znode deleted or not.
580       // In both cases the znode should be deleted.
581 
582       if (regionSplitDone) {
583         assertFalse("Region state of region in SPLITTING should be removed from rit.",
584             am.getRegionStates().isRegionsInTransition());
585       } else {
586         while (!am.assignInvoked) {
587           Thread.sleep(1);
588         }
589         assertTrue("Assign should be invoked.", am.assignInvoked);
590       }
591     } finally {
592       REGIONINFO.setOffline(false);
593       REGIONINFO.setSplit(false);
594       executor.shutdown();
595       am.shutdown();
596       // Clean up all znodes
597       ZKAssign.deleteAllNodes(this.watcher);
598     }
599   }
600 
601   private void testCaseWithPartiallyDisabledState(Table.State state) throws KeeperException,
602       IOException, CoordinatedStateException, ServiceException {
603     // Create and startup an executor. This is used by AssignmentManager
604     // handling zk callbacks.
605     ExecutorService executor = startupMasterExecutor("testSSHWhenDisableTableInProgress");
606     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
607     ZKAssign.deleteAllNodes(this.watcher);
608 
609     // Create an AM.
610     AssignmentManager am = new AssignmentManager(this.server,
611       this.serverManager, balancer, executor, null, master.getTableLockManager());
612     // adding region to regions and servers maps.
613     am.regionOnline(REGIONINFO, SERVERNAME_A);
614     // adding region in pending close.
615     am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
616     if (state == Table.State.DISABLING) {
617       am.getTableStateManager().setTableState(REGIONINFO.getTable(),
618         Table.State.DISABLING);
619     } else {
620       am.getTableStateManager().setTableState(REGIONINFO.getTable(),
621         Table.State.DISABLED);
622     }
623     RegionTransition data = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
624         REGIONINFO.getRegionName(), SERVERNAME_A);
625     // RegionTransitionData data = new
626     // RegionTransitionData(EventType.M_ZK_REGION_CLOSING,
627     // REGIONINFO.getRegionName(), SERVERNAME_A);
628     String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
629     // create znode in M_ZK_REGION_CLOSING state.
630     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
631 
632     try {
633       processServerShutdownHandler(am, false);
634       // check znode deleted or not.
635       // In both cases the znode should be deleted.
636       assertTrue("The znode should be deleted.", ZKUtil.checkExists(this.watcher, node) == -1);
637       // check whether in rit or not. In the DISABLING case also the below
638       // assert will be true but the piece of code added for HBASE-5927 will not
639       // do that.
640       if (state == Table.State.DISABLED) {
641         assertFalse("Region state of region in pending close should be removed from rit.",
642             am.getRegionStates().isRegionsInTransition());
643       }
644     } finally {
645       am.setEnabledTable(REGIONINFO.getTable());
646       executor.shutdown();
647       am.shutdown();
648       // Clean up all znodes
649       ZKAssign.deleteAllNodes(this.watcher);
650     }
651   }
652 
653   private void processServerShutdownHandler(AssignmentManager am, boolean splitRegion)
654       throws IOException, ServiceException {
655     processServerShutdownHandler(am, splitRegion, false);
656   }
657 
658   private void processServerShutdownHandler(
659       AssignmentManager am, boolean splitRegion, boolean deadserverRestarted)
660       throws IOException, ServiceException {
661     // Make sure our new AM gets callbacks; once registered, can't unregister.
662     // Thats ok because we make a new zk watcher for each test.
663     this.watcher.registerListenerFirst(am);
664 
665     // Need to set up a fake scan of meta for the servershutdown handler
666     // Make an RS Interface implementation.  Make it so a scanner can go against it.
667     ClientProtos.ClientService.BlockingInterface implementation =
668       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
669     // Get a meta row result that has region up on SERVERNAME_A
670 
671     Result r;
672     if (splitRegion) {
673       r = MetaMockingUtil.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A);
674     } else {
675       r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
676     }
677 
678     final ScanResponse.Builder builder = ScanResponse.newBuilder();
679     builder.setMoreResults(true);
680     builder.addCellsPerResult(r.size());
681     final List<CellScannable> cellScannables = new ArrayList<CellScannable>(1);
682     cellScannables.add(r);
683     Mockito.when(implementation.scan(
684       (RpcController)Mockito.any(), (ScanRequest)Mockito.any())).
685       thenAnswer(new Answer<ScanResponse>() {
686           @Override
687           public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
688             PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
689                 .getArguments()[0];
690             if (controller != null) {
691               controller.setCellScanner(CellUtil.createCellScanner(cellScannables));
692             }
693             return builder.build();
694           }
695       });
696 
697     // Get a connection w/ mocked up common methods.
698     ClusterConnection connection =
699       HConnectionTestingUtility.getMockedConnectionAndDecorate(HTU.getConfiguration(),
700         null, implementation, SERVERNAME_B, REGIONINFO);
701     // These mocks were done up when all connections were managed.  World is different now we
702     // moved to unmanaged connections.  It messes up the intercepts done in these tests.
703     // Just mark connections as marked and then down in MetaTableAccessor, it will go the path
704     // that picks up the above mocked up 'implementation' so 'scans' of meta return the expected
705     // result.  Redo in new realm of unmanaged connections.
706     Mockito.when(connection.isManaged()).thenReturn(true);
707     try {
708       // Make it so we can get a catalogtracker from servermanager.. .needed
709       // down in guts of server shutdown handler.
710       Mockito.when(this.server.getConnection()).thenReturn(connection);
711 
712       // Now make a server shutdown handler instance and invoke process.
713       // Have it that SERVERNAME_A died.
714       DeadServer deadServers = new DeadServer();
715       deadServers.add(SERVERNAME_A);
716       Mockito.when(this.serverManager.isServerReachable(SERVERNAME_B)).thenReturn(true);
717       Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(false);
718       final Map<ServerName, ServerLoad> onlineServers = new HashMap<ServerName, ServerLoad>();
719       onlineServers.put(SERVERNAME_B, ServerLoad.EMPTY_SERVERLOAD);
720       if (deadserverRestarted) {
721         // Now make the same server (same host name and port) online again with a different
722         // start code.
723         Mockito.when(this.serverManager.isServerOnline(SERVERNAME_AA)).thenReturn(true);
724         Mockito.when(this.serverManager.isServerReachable(SERVERNAME_AA)).thenReturn(true);
725         Mockito.when(
726           this.serverManager.isServerWithSameHostnamePortOnline(SERVERNAME_A)).thenReturn(true);
727         onlineServers.put(SERVERNAME_AA, ServerLoad.EMPTY_SERVERLOAD);
728       }
729       Mockito.when(this.serverManager.getOnlineServersList()).thenReturn(
730           new ArrayList<ServerName>(onlineServers.keySet()));
731       Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers);
732       List<ServerName> avServers = new ArrayList<ServerName>();
733       avServers.addAll(onlineServers.keySet());
734       Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(avServers);
735       // I need a services instance that will return the AM
736       MasterFileSystem fs = Mockito.mock(MasterFileSystem.class);
737       Mockito.doNothing().when(fs).setLogRecoveryMode();
738       Mockito.when(fs.getLogRecoveryMode()).thenReturn(RecoveryMode.LOG_REPLAY);
739       MasterServices services = Mockito.mock(MasterServices.class);
740       Mockito.when(services.getAssignmentManager()).thenReturn(am);
741       Mockito.when(services.getServerManager()).thenReturn(this.serverManager);
742       Mockito.when(services.getZooKeeper()).thenReturn(this.watcher);
743       Mockito.when(services.getMasterFileSystem()).thenReturn(fs);
744       Mockito.when(services.getConnection()).thenReturn(connection);
745       Configuration conf = server.getConfiguration();
746       Mockito.when(services.getConfiguration()).thenReturn(conf);
747       ServerShutdownHandler handler = new ServerShutdownHandler(this.server,
748           services, deadServers, SERVERNAME_A, false);
749       am.failoverCleanupDone.set(true);
750       handler.process();
751       // The region in r will have been assigned.  It'll be up in zk as unassigned.
752     } finally {
753       if (connection != null) connection.close();
754     }
755   }
756 
757   /**
758    * Create and startup executor pools. Start same set as master does (just
759    * run a few less).
760    * @param name Name to give our executor
761    * @return Created executor (be sure to call shutdown when done).
762    */
763   private ExecutorService startupMasterExecutor(final String name) {
764     // TODO: Move up into HBaseTestingUtility?  Generally useful.
765     ExecutorService executor = new ExecutorService(name);
766     executor.startExecutorService(ExecutorType.MASTER_OPEN_REGION, 3);
767     executor.startExecutorService(ExecutorType.MASTER_CLOSE_REGION, 3);
768     executor.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS, 3);
769     executor.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS, 3);
770     return executor;
771   }
772 
773   @Test (timeout=180000)
774   public void testUnassignWithSplitAtSameTime() throws KeeperException,
775       IOException, CoordinatedStateException {
776     // Region to use in test.
777     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
778     // First amend the servermanager mock so that when we do send close of the
779     // first meta region on SERVERNAME_A, it will return true rather than
780     // default null.
781     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, hri, -1)).thenReturn(true);
782     // Need a mocked catalog tracker.
783     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server
784         .getConfiguration());
785     // Create an AM.
786     AssignmentManager am = new AssignmentManager(this.server,
787       this.serverManager, balancer, null, null, master.getTableLockManager());
788     try {
789       // First make sure my mock up basically works.  Unassign a region.
790       unassign(am, SERVERNAME_A, hri);
791       // This delete will fail if the previous unassign did wrong thing.
792       ZKAssign.deleteClosingNode(this.watcher, hri, SERVERNAME_A);
793       // Now put a SPLITTING region in the way.  I don't have to assert it
794       // go put in place.  This method puts it in place then asserts it still
795       // owns it by moving state from SPLITTING to SPLITTING.
796       int version = createNodeSplitting(this.watcher, hri, SERVERNAME_A);
797       // Now, retry the unassign with the SPLTTING in place.  It should just
798       // complete without fail; a sort of 'silent' recognition that the
799       // region to unassign has been split and no longer exists: TOOD: what if
800       // the split fails and the parent region comes back to life?
801       unassign(am, SERVERNAME_A, hri);
802       // This transition should fail if the znode has been messed with.
803       ZKAssign.transitionNode(this.watcher, hri, SERVERNAME_A,
804         EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
805       assertFalse(am.getRegionStates().isRegionInTransition(hri));
806     } finally {
807       am.shutdown();
808     }
809   }
810 
811   /**
812    * Tests the processDeadServersAndRegionsInTransition should not fail with NPE
813    * when it failed to get the children. Let's abort the system in this
814    * situation
815    * @throws ServiceException
816    */
817   @Test(timeout = 60000)
818   public void testProcessDeadServersAndRegionsInTransitionShouldNotFailWithNPE()
819       throws IOException, KeeperException, CoordinatedStateException,
820       InterruptedException, ServiceException {
821     final RecoverableZooKeeper recoverableZk = Mockito
822         .mock(RecoverableZooKeeper.class);
823     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
824       this.server, this.serverManager);
825     Watcher zkw = new ZooKeeperWatcher(HBaseConfiguration.create(), "unittest",
826         null) {
827       @Override
828       public RecoverableZooKeeper getRecoverableZooKeeper() {
829         return recoverableZk;
830       }
831     };
832     ((ZooKeeperWatcher) zkw).registerListener(am);
833     Mockito.doThrow(new InterruptedException()).when(recoverableZk)
834         .getChildren("/hbase/region-in-transition", null);
835     am.setWatcher((ZooKeeperWatcher) zkw);
836     try {
837       am.processDeadServersAndRegionsInTransition(null);
838       fail("Expected to abort");
839     } catch (NullPointerException e) {
840       fail("Should not throw NPE");
841     } catch (RuntimeException e) {
842       assertEquals("Aborted", e.getLocalizedMessage());
843     } finally {
844       am.shutdown();
845     }
846   }
847   /**
848    * TestCase verifies that the regionPlan is updated whenever a region fails to open
849    * and the master tries to process RS_ZK_FAILED_OPEN state.(HBASE-5546).
850    */
851   @Test(timeout = 60000)
852   public void testRegionPlanIsUpdatedWhenRegionFailsToOpen() throws IOException, KeeperException,
853       ServiceException, InterruptedException, CoordinatedStateException {
854     this.server.getConfiguration().setClass(
855       HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MockedLoadBalancer.class,
856       LoadBalancer.class);
857     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
858       this.server, this.serverManager);
859     try {
860       // Boolean variable used for waiting until randomAssignment is called and
861       // new
862       // plan is generated.
863       AtomicBoolean gate = new AtomicBoolean(false);
864       if (balancer instanceof MockedLoadBalancer) {
865         ((MockedLoadBalancer) balancer).setGateVariable(gate);
866       }
867       ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
868       int v = ZKAssign.getVersion(this.watcher, REGIONINFO);
869       ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A,
870           EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, v);
871       String path = ZKAssign.getNodeName(this.watcher, REGIONINFO
872           .getEncodedName());
873       am.getRegionStates().updateRegionState(
874         REGIONINFO, State.OPENING, SERVERNAME_A);
875       // a dummy plan inserted into the regionPlans. This plan is cleared and
876       // new one is formed
877       am.regionPlans.put(REGIONINFO.getEncodedName(), new RegionPlan(
878           REGIONINFO, null, SERVERNAME_A));
879       RegionPlan regionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
880       List<ServerName> serverList = new ArrayList<ServerName>(2);
881       serverList.add(SERVERNAME_B);
882       Mockito.when(
883           this.serverManager.createDestinationServersList(Arrays.asList(SERVERNAME_A)))
884           .thenReturn(serverList);
885       am.nodeDataChanged(path);
886       // here we are waiting until the random assignment in the load balancer is
887       // called.
888       while (!gate.get()) {
889         Thread.sleep(10);
890       }
891       // new region plan may take some time to get updated after random
892       // assignment is called and
893       // gate is set to true.
894       RegionPlan newRegionPlan = am.regionPlans
895           .get(REGIONINFO.getEncodedName());
896       while (newRegionPlan == null) {
897         Thread.sleep(10);
898         newRegionPlan = am.regionPlans.get(REGIONINFO.getEncodedName());
899       }
900       // the new region plan created may contain the same RS as destination but
901       // it should
902       // be new plan.
903       assertNotSame("Same region plan should not come", regionPlan,
904           newRegionPlan);
905       assertTrue("Destination servers should be different.", !(regionPlan
906           .getDestination().equals(newRegionPlan.getDestination())));
907 
908       Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName());
909     } finally {
910       this.server.getConfiguration().setClass(
911           HConstants.HBASE_MASTER_LOADBALANCER_CLASS, StochasticLoadBalancer.class,
912           LoadBalancer.class);
913       am.getExecutorService().shutdown();
914       am.shutdown();
915     }
916   }
917 
918 
919   /**
920    * Mocked load balancer class used in the testcase to make sure that the testcase waits until
921    * random assignment is called and the gate variable is set to true.
922    */
923   public static class MockedLoadBalancer extends StochasticLoadBalancer {
924     private AtomicBoolean gate;
925 
926     public void setGateVariable(AtomicBoolean gate) {
927       this.gate = gate;
928     }
929 
930     @Override
931     public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
932       ServerName randomServerName = super.randomAssignment(regionInfo, servers);
933       this.gate.set(true);
934       return randomServerName;
935     }
936 
937     @Override
938     public Map<ServerName, List<HRegionInfo>> retainAssignment(
939         Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
940       this.gate.set(true);
941       return super.retainAssignment(regions, servers);
942     }
943   }
944 
945   /*
946    * Tests the scenario
947    * - a regionserver (SERVERNAME_A) owns a region (hence the meta would have
948    *   the SERVERNAME_A as the host for the region),
949    * - SERVERNAME_A goes down
950    * - one of the affected regions is assigned to a live regionserver (SERVERNAME_B) but that
951    *   assignment somehow fails. The region ends up in the FAILED_OPEN state on ZK
952    * - [Issue that the patch on HBASE-13330 fixes] when the master is restarted,
953    *   the SSH for SERVERNAME_A rightly thinks that the region is now on transition on
954    *   SERVERNAME_B. But the owner for the region is still SERVERNAME_A in the AM's states.
955    *   The AM thinks that the SSH for SERVERNAME_A will assign the region. The region remains
956    *   unassigned for ever.
957    */
958   @Test(timeout = 60000)
959   public void testAssignmentOfRegionInSSHAndInFailedOpenState() throws IOException,
960   KeeperException, ServiceException, CoordinatedStateException, InterruptedException {
961     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
962         this.server, this.serverManager);
963     ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_B);
964     int v = ZKAssign.getVersion(this.watcher, REGIONINFO);
965     ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_B,
966         EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, v);
967     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_B)).thenReturn(true);
968     Mockito.when(this.serverManager.isServerReachable(SERVERNAME_B)).thenReturn(true);
969     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(false);
970     DeadServer deadServers = new DeadServer();
971     deadServers.add(SERVERNAME_A);
972     Mockito.when(this.serverManager.getDeadServers()).thenReturn(deadServers);
973     final Map<ServerName, ServerLoad> onlineServers = new HashMap<ServerName, ServerLoad>();
974     onlineServers.put(SERVERNAME_B, ServerLoad.EMPTY_SERVERLOAD);
975     Mockito.when(this.serverManager.getOnlineServersList()).thenReturn(
976         new ArrayList<ServerName>(onlineServers.keySet()));
977     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers);
978     am.gate.set(false);
979     // join the cluster - that's when the AM is really kicking in after a restart
980     am.joinCluster();
981     while (!am.gate.get()) {
982       Thread.sleep(10);
983     }
984     assertTrue(am.getRegionStates().getRegionState(REGIONINFO).getState()
985         == RegionState.State.PENDING_OPEN);
986     am.shutdown();
987   }
988 
989   @Test(timeout = 600000)
990   public void testAssignmentOfRegionRITWithOffline() throws IOException,
991       KeeperException, ServiceException, CoordinatedStateException, InterruptedException {
992     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
993         this.server, this.serverManager);
994     ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_B);
995     am.gate.set(false);
996     // join the cluster - that's when the AM is really kicking in after a restart
997     am.joinCluster();
998     while (!am.gate.get()) {
999       Thread.sleep(10);
1000     }
1001     assertTrue(am.getRegionStates().getRegionState(REGIONINFO).getState()
1002         == RegionState.State.PENDING_OPEN);
1003     am.shutdown();
1004   }
1005   /**
1006    * Test the scenario when the master is in failover and trying to process a
1007    * region which is in Opening state on a dead RS. Master will force offline the
1008    * region and put it in transition. AM relies on SSH to reassign it.
1009    */
1010   @Test(timeout = 60000)
1011   public void testRegionInOpeningStateOnDeadRSWhileMasterFailover() throws IOException,
1012       KeeperException, ServiceException, CoordinatedStateException, InterruptedException {
1013     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(
1014       this.server, this.serverManager);
1015     ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A);
1016     int version = ZKAssign.getVersion(this.watcher, REGIONINFO);
1017     ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A, EventType.M_ZK_REGION_OFFLINE,
1018         EventType.RS_ZK_REGION_OPENING, version);
1019     RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_OPENING,
1020         REGIONINFO.getRegionName(), SERVERNAME_A, HConstants.EMPTY_BYTE_ARRAY);
1021     version = ZKAssign.getVersion(this.watcher, REGIONINFO);
1022     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(false);
1023     am.getRegionStates().logSplit(SERVERNAME_A); // Assume log splitting is done
1024     am.getRegionStates().createRegionState(REGIONINFO);
1025     am.gate.set(false);
1026 
1027     BaseCoordinatedStateManager cp = new ZkCoordinatedStateManager();
1028     cp.initialize(server);
1029     cp.start();
1030 
1031     OpenRegionCoordination orc = cp.getOpenRegionCoordination();
1032     ZkOpenRegionCoordination.ZkOpenRegionDetails zkOrd =
1033       new ZkOpenRegionCoordination.ZkOpenRegionDetails();
1034     zkOrd.setServerName(server.getServerName());
1035     zkOrd.setVersion(version);
1036 
1037     assertFalse(am.processRegionsInTransition(rt, REGIONINFO, orc, zkOrd));
1038     am.getTableStateManager().setTableState(REGIONINFO.getTable(), Table.State.ENABLED);
1039     processServerShutdownHandler(am, false);
1040     // Waiting for the assignment to get completed.
1041     while (!am.gate.get()) {
1042       Thread.sleep(10);
1043     }
1044     assertTrue("The region should be assigned immediately.", null != am.regionPlans.get(REGIONINFO
1045         .getEncodedName()));
1046     am.shutdown();
1047   }
1048 
1049   /**
1050    * Test verifies whether assignment is skipped for regions of tables in DISABLING state during
1051    * clean cluster startup. See HBASE-6281.
1052    *
1053    * @throws KeeperException
1054    * @throws IOException
1055    * @throws Exception
1056    */
1057   @Test(timeout = 60000)
1058   public void testDisablingTableRegionsAssignmentDuringCleanClusterStartup()
1059       throws KeeperException, IOException, Exception {
1060     this.server.getConfiguration().setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
1061         MockedLoadBalancer.class, LoadBalancer.class);
1062     Mockito.when(this.serverManager.getOnlineServers()).thenReturn(
1063         new HashMap<ServerName, ServerLoad>(0));
1064     List<ServerName> destServers = new ArrayList<ServerName>(1);
1065     destServers.add(SERVERNAME_A);
1066     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
1067     // To avoid cast exception in DisableTableHandler process.
1068     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
1069 
1070     CoordinatedStateManager csm = CoordinatedStateManagerFactory.getCoordinatedStateManager(
1071       HTU.getConfiguration());
1072     MasterServices server = new HMaster(HTU.getConfiguration(), csm);
1073     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
1074         this.serverManager);
1075 
1076     Whitebox.setInternalState(server, "metaTableLocator", Mockito.mock(MetaTableLocator.class));
1077 
1078     // Make it so we can get a catalogtracker from servermanager.. .needed
1079     // down in guts of server shutdown handler.
1080     Whitebox.setInternalState(server, "clusterConnection", am.getConnection());
1081 
1082     AtomicBoolean gate = new AtomicBoolean(false);
1083     if (balancer instanceof MockedLoadBalancer) {
1084       ((MockedLoadBalancer) balancer).setGateVariable(gate);
1085     }
1086     try{
1087       // set table in disabling state.
1088       am.getTableStateManager().setTableState(REGIONINFO.getTable(),
1089         Table.State.DISABLING);
1090       am.joinCluster();
1091       // should not call retainAssignment if we get empty regions in assignAllUserRegions.
1092       assertFalse(
1093           "Assign should not be invoked for disabling table regions during clean cluster startup.",
1094           gate.get());
1095       // need to change table state from disabling to disabled.
1096       assertTrue("Table should be disabled.",
1097           am.getTableStateManager().isTableState(REGIONINFO.getTable(),
1098             Table.State.DISABLED));
1099     } finally {
1100       this.server.getConfiguration().setClass(
1101         HConstants.HBASE_MASTER_LOADBALANCER_CLASS, StochasticLoadBalancer.class,
1102         LoadBalancer.class);
1103       am.getTableStateManager().setTableState(REGIONINFO.getTable(),
1104         Table.State.ENABLED);
1105       am.shutdown();
1106     }
1107   }
1108 
1109   /**
1110    * Test verifies whether all the enabling table regions assigned only once during master startup.
1111    *
1112    * @throws KeeperException
1113    * @throws IOException
1114    * @throws Exception
1115    */
1116   @Test (timeout=180000)
1117   public void testMasterRestartWhenTableInEnabling() throws KeeperException, IOException, Exception {
1118     enabling = true;
1119     List<ServerName> destServers = new ArrayList<ServerName>(1);
1120     destServers.add(SERVERNAME_A);
1121     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
1122     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
1123     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
1124     CoordinatedStateManager csm = CoordinatedStateManagerFactory.getCoordinatedStateManager(
1125       HTU.getConfiguration());
1126     MasterServices server = new HMaster(HTU.getConfiguration(), csm);
1127 
1128     Whitebox.setInternalState(server, "serverManager", this.serverManager);
1129     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
1130         this.serverManager);
1131 
1132     Whitebox.setInternalState(server, "metaTableLocator", Mockito.mock(MetaTableLocator.class));
1133 
1134     // Make it so we can get a catalogtracker from servermanager.. .needed
1135     // down in guts of server shutdown handler.
1136     Whitebox.setInternalState(server, "clusterConnection", am.getConnection());
1137 
1138     try {
1139       // set table in enabling state.
1140       am.getTableStateManager().setTableState(REGIONINFO.getTable(),
1141         Table.State.ENABLING);
1142       new EnableTableHandler((Server)server, REGIONINFO.getTable(),
1143           am, new NullTableLockManager(), true).prepare()
1144           .process();
1145       assertEquals("Number of assignments should be 1.", 1, assignmentCount);
1146       assertTrue("Table should be enabled.",
1147           am.getTableStateManager().isTableState(REGIONINFO.getTable(),
1148             Table.State.ENABLED));
1149     } finally {
1150       enabling = false;
1151       assignmentCount = 0;
1152       am.getTableStateManager().setTableState(REGIONINFO.getTable(),
1153         Table.State.ENABLED);
1154       am.shutdown();
1155       ZKAssign.deleteAllNodes(this.watcher);
1156     }
1157   }
1158 
1159   /**
1160    * Test verifies whether stale znodes of unknown tables as for the hbase:meta will be removed or
1161    * not.
1162    * @throws KeeperException
1163    * @throws IOException
1164    * @throws Exception
1165    */
1166   @Test (timeout=180000)
1167   public void testMasterRestartShouldRemoveStaleZnodesOfUnknownTableAsForMeta()
1168       throws Exception {
1169     List<ServerName> destServers = new ArrayList<ServerName>(1);
1170     destServers.add(SERVERNAME_A);
1171     Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers);
1172     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true);
1173     HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0);
1174     CoordinatedStateManager csm = CoordinatedStateManagerFactory.getCoordinatedStateManager(
1175       HTU.getConfiguration());
1176     MasterServices server = new HMaster(HTU.getConfiguration(), csm);
1177     Whitebox.setInternalState(server, "serverManager", this.serverManager);
1178     AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server,
1179         this.serverManager);
1180 
1181     Whitebox.setInternalState(server, "metaTableLocator", Mockito.mock(MetaTableLocator.class));
1182 
1183     // Make it so we can get a catalogtracker from servermanager.. .needed
1184     // down in guts of server shutdown handler.
1185     Whitebox.setInternalState(server, "clusterConnection", am.getConnection());
1186 
1187     try {
1188       TableName tableName = TableName.valueOf("dummyTable");
1189       // set table in enabling state.
1190       am.getTableStateManager().setTableState(tableName,
1191         Table.State.ENABLING);
1192       am.joinCluster();
1193       assertFalse("Table should not be present in zookeeper.",
1194         am.getTableStateManager().isTablePresent(tableName));
1195     } finally {
1196       am.shutdown();
1197     }
1198   }
1199   /**
1200    * When a region is in transition, if the region server opening the region goes down,
1201    * the region assignment takes a long time normally (waiting for timeout monitor to trigger assign).
1202    * This test is to make sure SSH reassigns it right away.
1203    */
1204   @Test (timeout=180000)
1205   public void testSSHTimesOutOpeningRegionTransition()
1206       throws KeeperException, IOException, CoordinatedStateException, ServiceException {
1207     // Create an AM.
1208     AssignmentManagerWithExtrasForTesting am =
1209       setUpMockedAssignmentManager(this.server, this.serverManager);
1210     // adding region in pending open.
1211     RegionState state = new RegionState(REGIONINFO,
1212       State.OPENING, System.currentTimeMillis(), SERVERNAME_A);
1213     am.getRegionStates().regionOnline(REGIONINFO, SERVERNAME_B);
1214     am.getRegionStates().regionsInTransition.put(REGIONINFO.getEncodedName(), state);
1215     // adding region plan
1216     am.regionPlans.put(REGIONINFO.getEncodedName(),
1217       new RegionPlan(REGIONINFO, SERVERNAME_B, SERVERNAME_A));
1218     am.getTableStateManager().setTableState(REGIONINFO.getTable(),
1219       Table.State.ENABLED);
1220 
1221     try {
1222       am.assignInvoked = false;
1223       processServerShutdownHandler(am, false);
1224       assertTrue(am.assignInvoked);
1225     } finally {
1226       am.getRegionStates().regionsInTransition.remove(REGIONINFO.getEncodedName());
1227       am.regionPlans.remove(REGIONINFO.getEncodedName());
1228       am.shutdown();
1229     }
1230   }
1231 
1232   /**
1233    * Scenario:<ul>
1234    *  <li> master starts a close, and creates a znode</li>
1235    *  <li> it fails just at this moment, before contacting the RS</li>
1236    *  <li> while the second master is coming up, the targeted RS dies. But it's before ZK timeout so
1237    *    we don't know, and we have an exception.</li>
1238    *  <li> the master must handle this nicely and reassign.
1239    *  </ul>
1240    */
1241   @Test (timeout=180000)
1242   public void testClosingFailureDuringRecovery() throws Exception {
1243 
1244     AssignmentManagerWithExtrasForTesting am =
1245         setUpMockedAssignmentManager(this.server, this.serverManager);
1246     ZKAssign.createNodeClosing(this.watcher, REGIONINFO, SERVERNAME_A);
1247     try {
1248       am.getRegionStates().createRegionState(REGIONINFO);
1249 
1250       assertFalse( am.getRegionStates().isRegionsInTransition() );
1251 
1252       am.processRegionInTransition(REGIONINFO.getEncodedName(), REGIONINFO);
1253 
1254       assertTrue( am.getRegionStates().isRegionsInTransition() );
1255     } finally {
1256       am.shutdown();
1257     }
1258   }
1259 
1260   /**
1261    * Creates a new ephemeral node in the SPLITTING state for the specified region.
1262    * Create it ephemeral in case regionserver dies mid-split.
1263    *
1264    * <p>Does not transition nodes from other states.  If a node already exists
1265    * for this region, a {@link NodeExistsException} will be thrown.
1266    *
1267    * @param zkw zk reference
1268    * @param region region to be created as offline
1269    * @param serverName server event originates from
1270    * @return Version of znode created.
1271    * @throws KeeperException
1272    * @throws IOException
1273    */
1274   // Copied from SplitTransaction rather than open the method over there in
1275   // the regionserver package.
1276   private static int createNodeSplitting(final ZooKeeperWatcher zkw,
1277       final HRegionInfo region, final ServerName serverName)
1278   throws KeeperException, IOException {
1279     RegionTransition rt =
1280       RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
1281         region.getRegionName(), serverName);
1282 
1283     String node = ZKAssign.getNodeName(zkw, region.getEncodedName());
1284     if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.toByteArray())) {
1285       throw new IOException("Failed create of ephemeral " + node);
1286     }
1287     // Transition node from SPLITTING to SPLITTING and pick up version so we
1288     // can be sure this znode is ours; version is needed deleting.
1289     return transitionNodeSplitting(zkw, region, serverName, -1);
1290   }
1291 
1292   // Copied from SplitTransaction rather than open the method over there in
1293   // the regionserver package.
1294   private static int transitionNodeSplitting(final ZooKeeperWatcher zkw,
1295       final HRegionInfo parent,
1296       final ServerName serverName, final int version)
1297   throws KeeperException, IOException {
1298     return ZKAssign.transitionNode(zkw, parent, serverName,
1299       EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
1300   }
1301 
1302   private void unassign(final AssignmentManager am, final ServerName sn,
1303       final HRegionInfo hri) throws RegionException {
1304     // Before I can unassign a region, I need to set it online.
1305     am.regionOnline(hri, sn);
1306     // Unassign region.
1307     am.unassign(hri);
1308   }
1309 
1310   /**
1311    * Create an {@link AssignmentManagerWithExtrasForTesting} that has mocked
1312    * {@link CatalogTracker} etc.
1313    * @param server
1314    * @param manager
1315    * @return An AssignmentManagerWithExtras with mock connections, etc.
1316    * @throws IOException
1317    * @throws KeeperException
1318    */
1319   private AssignmentManagerWithExtrasForTesting setUpMockedAssignmentManager(
1320       final MasterServices server, final ServerManager manager)
1321         throws IOException, KeeperException, ServiceException, CoordinatedStateException {
1322     // Make an RS Interface implementation. Make it so a scanner can go against
1323     // it and a get to return the single region, REGIONINFO, this test is
1324     // messing with. Needed when "new master" joins cluster. AM will try and
1325     // rebuild its list of user regions and it will also get the HRI that goes
1326     // with an encoded name by doing a Get on hbase:meta
1327     ClientProtos.ClientService.BlockingInterface ri =
1328       Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
1329     // Get a meta row result that has region up on SERVERNAME_A for REGIONINFO
1330     Result r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
1331     final ScanResponse.Builder builder = ScanResponse.newBuilder();
1332     builder.setMoreResults(true);
1333     builder.addCellsPerResult(r.size());
1334     final List<CellScannable> rows = new ArrayList<CellScannable>(1);
1335     rows.add(r);
1336     Answer<ScanResponse> ans = new Answer<ClientProtos.ScanResponse>() {
1337       @Override
1338       public ScanResponse answer(InvocationOnMock invocation) throws Throwable {
1339         PayloadCarryingRpcController controller = (PayloadCarryingRpcController) invocation
1340             .getArguments()[0];
1341         if (controller != null) {
1342           controller.setCellScanner(CellUtil.createCellScanner(rows));
1343         }
1344         return builder.build();
1345       }
1346     };
1347     if (enabling) {
1348       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any()))
1349           .thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans).thenAnswer(ans)
1350           .thenReturn(ScanResponse.newBuilder().setMoreResults(false).build());
1351     } else {
1352       Mockito.when(ri.scan((RpcController) Mockito.any(), (ScanRequest) Mockito.any())).thenAnswer(
1353           ans);
1354     }
1355     // If a get, return the above result too for REGIONINFO
1356     GetResponse.Builder getBuilder = GetResponse.newBuilder();
1357     getBuilder.setResult(ProtobufUtil.toResult(r));
1358     Mockito.when(ri.get((RpcController)Mockito.any(), (GetRequest) Mockito.any())).
1359       thenReturn(getBuilder.build());
1360     // Get a connection w/ mocked up common methods.
1361     ClusterConnection connection = (ClusterConnection)HConnectionTestingUtility.
1362       getMockedConnectionAndDecorate(HTU.getConfiguration(), null,
1363         ri, SERVERNAME_B, REGIONINFO);
1364     Mockito.when(this.server.getConnection()).thenReturn(connection);
1365 
1366     // These mocks were done up when all connections were managed.  World is different now we
1367     // moved to unmanaged connections.  It messes up the intercepts done in these tests.
1368     // Just mark connections as marked and then down in MetaTableAccessor, it will go the path
1369     // that picks up the above mocked up 'implementation' so 'scans' of meta return the expected
1370     // result.  Redo in new realm of unmanaged connections.
1371     Mockito.when(connection.isManaged()).thenReturn(true);
1372     // Make it so we can get the connection from our mocked catalogtracker
1373     // Create and startup an executor. Used by AM handling zk callbacks.
1374     ExecutorService executor = startupMasterExecutor("mockedAMExecutor");
1375     this.balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
1376     AssignmentManagerWithExtrasForTesting am = new AssignmentManagerWithExtrasForTesting(
1377       server, connection, manager, this.balancer, executor, new NullTableLockManager());
1378     return am;
1379   }
1380 
1381   /**
1382    * An {@link AssignmentManager} with some extra facility used testing
1383    */
1384   class AssignmentManagerWithExtrasForTesting extends AssignmentManager {
1385     // Keep a reference so can give it out below in {@link #getExecutorService}
1386     private final ExecutorService es;
1387     boolean processRITInvoked = false;
1388     boolean assignInvoked = false;
1389     AtomicBoolean gate = new AtomicBoolean(true);
1390     private ClusterConnection connection;
1391 
1392     public AssignmentManagerWithExtrasForTesting(
1393         final MasterServices master, ClusterConnection connection, final ServerManager serverManager,
1394         final LoadBalancer balancer,
1395         final ExecutorService service, final TableLockManager tableLockManager)
1396             throws KeeperException, IOException, CoordinatedStateException {
1397       super(master, serverManager, balancer, service, null, tableLockManager);
1398       this.es = service;
1399       this.connection = connection;
1400     }
1401 
1402     @Override
1403     boolean processRegionInTransition(String encodedRegionName,
1404         HRegionInfo regionInfo) throws KeeperException, IOException {
1405       this.processRITInvoked = true;
1406       return super.processRegionInTransition(encodedRegionName, regionInfo);
1407     }
1408 
1409     @Override
1410     public void assign(HRegionInfo region, boolean setOfflineInZK, boolean forceNewPlan) {
1411       if (enabling) {
1412         assignmentCount++;
1413         this.regionOnline(region, SERVERNAME_A);
1414       } else {
1415         super.assign(region, setOfflineInZK, forceNewPlan);
1416         this.gate.set(true);
1417       }
1418     }
1419     @Override
1420     public void assign(RegionState state,
1421         boolean setOfflineInZK, final boolean forceNewPlan) {
1422       if (enabling) {
1423         assignmentCount++;
1424       } else {
1425         super.assign(state, setOfflineInZK, forceNewPlan);
1426         this.gate.set(true);
1427       }
1428     }
1429 
1430     @Override
1431     boolean assign(ServerName destination, List<HRegionInfo> regions)
1432         throws InterruptedException {
1433       if (enabling) {
1434         for (HRegionInfo region : regions) {
1435           assignmentCount++;
1436           this.regionOnline(region, SERVERNAME_A);
1437         }
1438         return true;
1439       }
1440       return super.assign(destination, regions);
1441     }
1442 
1443     @Override
1444     public void assign(Map<HRegionInfo, ServerName> regionServerMap)
1445         throws IOException, InterruptedException {
1446       assignInvoked = (regionServerMap != null && regionServerMap.size() > 0);
1447       super.assign(regionServerMap);
1448       this.gate.set(true);
1449     }
1450 
1451     @Override
1452     public void assign(List<HRegionInfo> regions)
1453         throws IOException, InterruptedException {
1454       assignInvoked = (regions != null && regions.size() > 0);
1455       super.assign(regions);
1456       this.gate.set(true);
1457     }
1458 
1459     /** reset the watcher */
1460     void setWatcher(ZooKeeperWatcher watcher) {
1461       this.watcher = watcher;
1462     }
1463 
1464     /**
1465      * @return ExecutorService used by this instance.
1466      */
1467     ExecutorService getExecutorService() {
1468       return this.es;
1469     }
1470 
1471     /*
1472      * Convenient method to retrieve mocked up connection
1473      */
1474     ClusterConnection getConnection() {
1475       return this.connection;
1476     }
1477 
1478     @Override
1479     public void shutdown() {
1480       super.shutdown();
1481       if (this.connection != null)
1482         try {
1483           this.connection.close();
1484         } catch (IOException e) {
1485           fail("Failed to close connection");
1486         }
1487     }
1488   }
1489 
1490   /**
1491    * Call joinCluster on the passed AssignmentManager.  Do it in a thread
1492    * so it runs independent of what all else is going on.  Try to simulate
1493    * an AM running insided a failed over master by clearing all in-memory
1494    * AM state first.
1495   */
1496   private void startFakeFailedOverMasterAssignmentManager(final AssignmentManager am,
1497       final ZooKeeperWatcher watcher) {
1498     // Make sure our new AM gets callbacks; once registered, we can't unregister.
1499     // Thats ok because we make a new zk watcher for each test.
1500     watcher.registerListenerFirst(am);
1501     Thread t = new Thread("RunAmJoinCluster") {
1502       @Override
1503       public void run() {
1504         // Call the joinCluster function as though we were doing a master
1505         // failover at this point. It will stall just before we go to add
1506         // the RIT region to our RIT Map in AM at processRegionsInTransition.
1507         // First clear any inmemory state from AM so it acts like a new master
1508         // coming on line.
1509         am.getRegionStates().regionsInTransition.clear();
1510         am.regionPlans.clear();
1511         try {
1512           am.joinCluster();
1513         } catch (IOException e) {
1514           throw new RuntimeException(e);
1515         } catch (KeeperException e) {
1516           throw new RuntimeException(e);
1517         } catch (InterruptedException e) {
1518           throw new RuntimeException(e);
1519         } catch (CoordinatedStateException e) {
1520           throw new RuntimeException(e);
1521         }
1522       }
1523     };
1524     t.start();
1525     while (!t.isAlive()) Threads.sleep(1);
1526   }
1527 
1528   @Test (timeout=180000)
1529   public void testForceAssignMergingRegion() throws Exception {
1530     // Region to use in test.
1531     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1532     // Need a mocked catalog tracker.
1533     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1534       server.getConfiguration());
1535     // Create an AM.
1536     AssignmentManager am = new AssignmentManager(this.server,
1537       this.serverManager, balancer, null, null, master.getTableLockManager());
1538     RegionStates regionStates = am.getRegionStates();
1539     try {
1540       // First set the state of the region to merging
1541       regionStates.updateRegionState(hri, RegionState.State.MERGING);
1542       // Now, try to assign it with force new plan
1543       am.assign(hri, true, true);
1544       assertEquals("The region should be still in merging state",
1545         RegionState.State.MERGING, regionStates.getRegionState(hri).getState());
1546     } finally {
1547       am.shutdown();
1548     }
1549   }
1550 
1551   /**
1552    * Test assignment related ZK events are ignored by AM if the region is not known
1553    * by AM to be in transition. During normal operation, all assignments are started
1554    * by AM (not considering split/merge), if an event is received but the region
1555    * is not in transition, the event must be a very late one. So it can be ignored.
1556    * During master failover, since AM watches assignment znodes after failover cleanup
1557    * is completed, when an event comes in, AM should already have the region in transition
1558    * if ZK is used during the assignment action (only hbck doesn't use ZK for region
1559    * assignment). So during master failover, we can ignored such events too.
1560    */
1561   @Test (timeout=180000)
1562   public void testAssignmentEventIgnoredIfNotExpected() throws KeeperException, IOException,
1563       CoordinatedStateException {
1564     // Region to use in test.
1565     final HRegionInfo hri = HRegionInfo.FIRST_META_REGIONINFO;
1566     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1567       server.getConfiguration());
1568     final AtomicBoolean zkEventProcessed = new AtomicBoolean(false);
1569     // Create an AM.
1570     AssignmentManager am = new AssignmentManager(this.server,
1571       this.serverManager, balancer, null, null, master.getTableLockManager()) {
1572 
1573       @Override
1574       void handleRegion(final RegionTransition rt, OpenRegionCoordination coordination,
1575                         OpenRegionCoordination.OpenRegionDetails ord) {
1576         super.handleRegion(rt, coordination, ord);
1577         if (rt != null && Bytes.equals(hri.getRegionName(),
1578           rt.getRegionName()) && rt.getEventType() == EventType.RS_ZK_REGION_OPENING) {
1579           zkEventProcessed.set(true);
1580         }
1581       }
1582     };
1583     try {
1584       // First make sure the region is not in transition
1585       am.getRegionStates().regionOffline(hri);
1586       zkEventProcessed.set(false); // Reset it before faking zk transition
1587       this.watcher.registerListenerFirst(am);
1588       assertFalse("The region should not be in transition",
1589         am.getRegionStates().isRegionInTransition(hri));
1590       ZKAssign.createNodeOffline(this.watcher, hri, SERVERNAME_A);
1591       // Trigger a transition event
1592       ZKAssign.transitionNodeOpening(this.watcher, hri, SERVERNAME_A);
1593       long startTime = EnvironmentEdgeManager.currentTime();
1594       while (!zkEventProcessed.get()) {
1595         assertTrue("Timed out in waiting for ZK event to be processed",
1596           EnvironmentEdgeManager.currentTime() - startTime < 30000);
1597         Threads.sleepWithoutInterrupt(100);
1598       }
1599       assertFalse(am.getRegionStates().isRegionInTransition(hri));
1600     } finally {
1601       am.shutdown();
1602     }
1603   }
1604 
1605   /**
1606    * If a table is deleted, we should not be able to balance it anymore.
1607    * Otherwise, the region will be brought back.
1608    * @throws Exception
1609    */
1610   @Test (timeout=180000)
1611   public void testBalanceRegionOfDeletedTable() throws Exception {
1612     AssignmentManager am = new AssignmentManager(this.server, this.serverManager,
1613       balancer, null, null, master.getTableLockManager());
1614     RegionStates regionStates = am.getRegionStates();
1615     HRegionInfo hri = REGIONINFO;
1616     regionStates.createRegionState(hri);
1617     assertFalse(regionStates.isRegionInTransition(hri));
1618     RegionPlan plan = new RegionPlan(hri, SERVERNAME_A, SERVERNAME_B);
1619     // Fake table is deleted
1620     regionStates.tableDeleted(hri.getTable());
1621     am.balance(plan);
1622     assertFalse("The region should not in transition",
1623       regionStates.isRegionInTransition(hri));
1624     am.shutdown();
1625   }
1626 
1627   /**
1628    * Tests an on-the-fly RPC that was scheduled for the earlier RS on the same port
1629    * for openRegion. AM should assign this somewhere else. (HBASE-9721)
1630    */
1631   @SuppressWarnings("unchecked")
1632   @Test (timeout=180000)
1633   public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception {
1634     Mockito.when(this.serverManager.sendRegionOpen(Mockito.eq(SERVERNAME_B), Mockito.eq(REGIONINFO),
1635       Mockito.anyInt(), (List<ServerName>)Mockito.any()))
1636       .thenThrow(new DoNotRetryIOException());
1637     this.server.getConfiguration().setInt("hbase.assignment.maximum.attempts", 100);
1638 
1639     HRegionInfo hri = REGIONINFO;
1640     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1641       server.getConfiguration());
1642     // Create an AM.
1643     AssignmentManager am = new AssignmentManager(this.server,
1644       this.serverManager, balancer, null, null, master.getTableLockManager());
1645     RegionStates regionStates = am.getRegionStates();
1646     try {
1647       am.regionPlans.put(REGIONINFO.getEncodedName(),
1648         new RegionPlan(REGIONINFO, null, SERVERNAME_B));
1649 
1650       // Should fail once, but succeed on the second attempt for the SERVERNAME_A
1651       am.assign(hri, true, false);
1652     } finally {
1653       assertEquals(SERVERNAME_A, regionStates.getRegionState(REGIONINFO).getServerName());
1654       am.shutdown();
1655     }
1656   }
1657 
1658   /**
1659    * Tests close region call on a region server that is aborting
1660    */
1661   @Test (timeout=180000)
1662   public void testCloseRegionOnAbortingRS() throws Exception {
1663     this.server.getConfiguration().setInt("hbase.assignment.maximum.attempts", 2);
1664 
1665     HRegionInfo hri = REGIONINFO;
1666     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1667       server.getConfiguration());
1668     // Create an AM.
1669     AssignmentManager am = new AssignmentManager(this.server,
1670       this.serverManager, balancer, null, null, master.getTableLockManager());
1671     RegionStates regionStates = am.getRegionStates();
1672 
1673     regionStates.createRegionState(hri, State.OPEN, SERVERNAME_B, SERVERNAME_B);
1674 
1675     // mock aborting region server
1676     Mockito.when(this.serverManager.sendRegionClose(Mockito.eq(SERVERNAME_B), Mockito.eq(REGIONINFO),
1677       Mockito.anyInt(), (ServerName)Mockito.any(), Mockito.anyBoolean()))
1678       .thenThrow(new RegionServerAbortedException(""));
1679 
1680     // try to unassign the region
1681     am.unassign(hri);
1682 
1683     // assert that the we have FAILED_CLOSE for region state
1684     assertEquals(State.FAILED_CLOSE, regionStates.getRegionState(REGIONINFO).getState());
1685     assertEquals(SERVERNAME_B, regionStates.getRegionState(REGIONINFO).getServerName());
1686 
1687     am.shutdown();
1688   }
1689 
1690   /**
1691    * Tests close region call on a region server that is not in onlineServer list
1692    */
1693   @Test (timeout=180000)
1694   public void testCloseRegionOnServerNotOnline() throws Exception {
1695     this.server.getConfiguration().setInt("hbase.assignment.maximum.attempts", 2);
1696 
1697     HRegionInfo hri = REGIONINFO;
1698     LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(
1699       server.getConfiguration());
1700     // Create an AM.
1701     AssignmentManager am = new AssignmentManager(this.server,
1702       this.serverManager, balancer, null, null, master.getTableLockManager()) {
1703       @Override
1704       protected boolean wasRegionOnDeadServerByMeta(HRegionInfo region, ServerName sn) {
1705         return true;
1706       };
1707     };
1708     RegionStates regionStates = am.getRegionStates();
1709 
1710     regionStates.createRegionState(hri, State.OPEN, SERVERNAME_B, SERVERNAME_B);
1711 
1712     // mock that RS is expired, but not processed
1713     Mockito.when(this.serverManager.isServerOnline(SERVERNAME_B))
1714       .thenReturn(false);
1715 
1716     // try to unassign the region
1717     am.unassign(hri);
1718 
1719     // assert that the we have OFFLINE
1720     assertEquals(State.OFFLINE, regionStates.getRegionState(REGIONINFO).getState());
1721 
1722     // try to assign the region before SSH
1723     am.regionPlans.put(REGIONINFO.getEncodedName(),
1724       new RegionPlan(REGIONINFO, null, SERVERNAME_A));
1725     am.assign(hri, true, false);
1726 
1727     // assert that the we still have OFFLINE
1728     assertEquals(State.OFFLINE, regionStates.getRegionState(REGIONINFO).getState());
1729 
1730     am.shutdown();
1731   }
1732 }