View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotEquals;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNull;
25  import static org.junit.Assert.assertTrue;
26  import static org.junit.Assert.fail;
27  
28  import java.io.IOException;
29  import java.util.ArrayList;
30  import java.util.Arrays;
31  import java.util.HashMap;
32  import java.util.List;
33  import java.util.Map;
34  import java.util.Set;
35  import java.util.concurrent.atomic.AtomicBoolean;
36  import java.util.concurrent.atomic.AtomicInteger;
37  
38  import com.google.common.collect.Lists;
39  import com.google.common.collect.Maps;
40  import org.apache.hadoop.conf.Configuration;
41  import org.apache.hadoop.fs.FileSystem;
42  import org.apache.hadoop.fs.Path;
43  import org.apache.hadoop.hbase.CoordinatedStateManager;
44  import org.apache.hadoop.hbase.HBaseTestingUtility;
45  import org.apache.hadoop.hbase.HColumnDescriptor;
46  import org.apache.hadoop.hbase.HConstants;
47  import org.apache.hadoop.hbase.HRegionInfo;
48  import org.apache.hadoop.hbase.HTableDescriptor;
49  import org.apache.hadoop.hbase.testclassification.MediumTests;
50  import org.apache.hadoop.hbase.MetaTableAccessor;
51  import org.apache.hadoop.hbase.MiniHBaseCluster;
52  import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
53  import org.apache.hadoop.hbase.ServerLoad;
54  import org.apache.hadoop.hbase.ServerName;
55  import org.apache.hadoop.hbase.TableName;
56  import org.apache.hadoop.hbase.UnknownRegionException;
57  import org.apache.hadoop.hbase.Waiter;
58  import org.apache.hadoop.hbase.client.Admin;
59  import org.apache.hadoop.hbase.client.HBaseAdmin;
60  import org.apache.hadoop.hbase.client.HTable;
61  import org.apache.hadoop.hbase.client.Result;
62  import org.apache.hadoop.hbase.client.Table;
63  import org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager;
64  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
65  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
66  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
67  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
68  import org.apache.hadoop.hbase.coprocessor.RegionObserver;
69  import org.apache.hadoop.hbase.executor.EventType;
70  import org.apache.hadoop.hbase.master.RegionState.State;
71  import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
72  import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
73  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
74  import org.apache.hadoop.hbase.regionserver.HRegionServer;
75  import org.apache.hadoop.hbase.util.Bytes;
76  import org.apache.hadoop.hbase.util.ConfigUtil;
77  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
78  import org.apache.hadoop.hbase.util.FSUtils;
79  import org.apache.hadoop.hbase.util.JVMClusterUtil;
80  import org.apache.hadoop.hbase.util.Threads;
81  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
82  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
83  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
84  import org.apache.zookeeper.KeeperException;
85  import org.junit.AfterClass;
86  import org.junit.BeforeClass;
87  import org.junit.Test;
88  import org.junit.experimental.categories.Category;
89  
90  
91  /**
92   * This tests AssignmentManager with a testing cluster.
93   */
94  @Category(MediumTests.class)
95  @SuppressWarnings("deprecation")
96  public class TestAssignmentManagerOnCluster {
97    private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
98    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
99    final static Configuration conf = TEST_UTIL.getConfiguration();
100   private static HBaseAdmin admin;
101 
102   static void setupOnce() throws Exception {
103     // Using the our load balancer to control region plans
104     conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
105       MyLoadBalancer.class, LoadBalancer.class);
106     conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
107       MyRegionObserver.class, RegionObserver.class);
108     // Reduce the maximum attempts to speed up the test
109     conf.setInt("hbase.assignment.maximum.attempts", 3);
110     // Put meta on master to avoid meta server shutdown handling
111     conf.set("hbase.balancer.tablesOnMaster", "hbase:meta");
112     conf.setInt("hbase.master.maximum.ping.server.attempts", 3);
113     conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1);
114 
115     TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class, MyRegionServer.class);
116     admin = TEST_UTIL.getHBaseAdmin();
117   }
118 
119   @BeforeClass
120   public static void setUpBeforeClass() throws Exception {
121     // Use ZK for region assignment
122     conf.setBoolean("hbase.assignment.usezk", true);
123     setupOnce();
124   }
125 
126   @AfterClass
127   public static void tearDownAfterClass() throws Exception {
128     TEST_UTIL.shutdownMiniCluster();
129   }
130 
131   /**
132    * This tests restarting meta regionserver
133    */
134   @Test (timeout=180000)
135   public void testRestartMetaRegionServer() throws Exception {
136     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
137     boolean stoppedARegionServer = false;
138     try {
139       HMaster master = cluster.getMaster();
140       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
141       ServerName metaServerName = regionStates.getRegionServerOfRegion(
142         HRegionInfo.FIRST_META_REGIONINFO);
143       if (master.getServerName().equals(metaServerName) || metaServerName == null
144           || !metaServerName.equals(cluster.getServerHoldingMeta())) {
145         // Move meta off master
146         metaServerName = cluster.getLiveRegionServerThreads()
147           .get(0).getRegionServer().getServerName();
148         master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
149           Bytes.toBytes(metaServerName.getServerName()));
150         master.assignmentManager.waitUntilNoRegionsInTransition(60000);
151       }
152       RegionState metaState =
153           MetaTableLocator.getMetaRegionState(master.getZooKeeper());
154         assertEquals("Meta should be not in transition",
155             metaState.getState(), RegionState.State.OPEN);
156       assertNotEquals("Meta should be moved off master",
157         metaServerName, master.getServerName());
158       cluster.killRegionServer(metaServerName);
159       stoppedARegionServer = true;
160       cluster.waitForRegionServerToStop(metaServerName, 60000);
161       // Wait for SSH to finish
162       final ServerManager serverManager = master.getServerManager();
163       TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
164         @Override
165         public boolean evaluate() throws Exception {
166           return !serverManager.areDeadServersInProgress();
167         }
168       });
169 
170       // Now, make sure meta is assigned
171       assertTrue("Meta should be assigned",
172         regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
173       // Now, make sure meta is registered in zk
174       metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
175       assertEquals("Meta should be not in transition",
176           metaState.getState(), RegionState.State.OPEN);
177       assertEquals("Meta should be assigned", metaState.getServerName(),
178         regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO));
179       assertNotEquals("Meta should be assigned on a different server",
180         metaState.getServerName(), metaServerName);
181     } finally {
182       if (stoppedARegionServer) {
183         cluster.startRegionServer();
184       }
185     }
186   }
187 
188   /**
189    * This tests region assignment
190    */
191   @Test (timeout=60000)
192   public void testAssignRegion() throws Exception {
193     String table = "testAssignRegion";
194     try {
195       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
196       desc.addFamily(new HColumnDescriptor(FAMILY));
197       admin.createTable(desc);
198 
199       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
200       HRegionInfo hri = new HRegionInfo(
201         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
202       MetaTableAccessor.addRegionToMeta(meta, hri);
203 
204       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
205       master.assignRegion(hri);
206       AssignmentManager am = master.getAssignmentManager();
207       am.waitForAssignment(hri);
208 
209       RegionStates regionStates = am.getRegionStates();
210       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
211       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
212 
213       // Region is assigned now. Let's assign it again.
214       // Master should not abort, and region should be assigned.
215       RegionState oldState = regionStates.getRegionState(hri);
216       TEST_UTIL.getHBaseAdmin().assign(hri.getRegionName());
217       master.getAssignmentManager().waitForAssignment(hri);
218       RegionState newState = regionStates.getRegionState(hri);
219       assertTrue(newState.isOpened()
220         && newState.getStamp() != oldState.getStamp());
221     } finally {
222       TEST_UTIL.deleteTable(Bytes.toBytes(table));
223     }
224   }
225 
226   // Simulate a scenario where the AssignCallable and SSH are trying to assign a region
227   @Test (timeout=60000)
228   public void testAssignRegionBySSH() throws Exception {
229     if (!conf.getBoolean("hbase.assignment.usezk", true)) {
230       return;
231     }
232     String table = "testAssignRegionBySSH";
233     MyMaster master = (MyMaster) TEST_UTIL.getHBaseCluster().getMaster();
234     try {
235       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
236       desc.addFamily(new HColumnDescriptor(FAMILY));
237       admin.createTable(desc);
238 
239       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
240       HRegionInfo hri = new HRegionInfo(
241         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
242       MetaTableAccessor.addRegionToMeta(meta, hri);
243       // Add some dummy server for the region entry
244       MetaTableAccessor.updateRegionLocation(TEST_UTIL.getHBaseCluster().getMaster().getConnection(), hri,
245         ServerName.valueOf("example.org", 1234, System.currentTimeMillis()), 0, -1);
246       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
247       int i = TEST_UTIL.getHBaseCluster().getServerWithMeta();
248       HRegionServer rs = TEST_UTIL.getHBaseCluster().getRegionServer(i == 0 ? 1 : 0);
249       // Choose a server other than meta to kill
250       ServerName controlledServer = rs.getServerName();
251       master.enableSSH(false);
252       TEST_UTIL.getHBaseCluster().killRegionServer(controlledServer);
253       TEST_UTIL.getHBaseCluster().waitForRegionServerToStop(controlledServer, -1);
254       AssignmentManager am = master.getAssignmentManager();
255 
256       // Simulate the AssignCallable trying to assign the region. Have the region in OFFLINE state,
257       // but not in transition and the server is the dead 'controlledServer'
258       regionStates.createRegionState(hri, State.OFFLINE, controlledServer, null);
259       am.assign(hri, true, true);
260       // Region should remain OFFLINE and go to transition
261       assertEquals(State.OFFLINE, regionStates.getRegionState(hri).getState());
262       assertTrue (regionStates.isRegionInTransition(hri));
263 
264       master.enableSSH(true);
265       am.waitForAssignment(hri);
266       assertTrue (regionStates.getRegionState(hri).isOpened());
267       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
268       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
269     } finally {
270       if (master != null) {
271         master.enableSSH(true);
272       }
273       TEST_UTIL.deleteTable(Bytes.toBytes(table));
274       TEST_UTIL.getHBaseCluster().startRegionServer();
275     }
276   }
277 
278   /**
279    * This tests region assignment on a simulated restarted server
280    */
281   @Test (timeout=120000)
282   public void testAssignRegionOnRestartedServer() throws Exception {
283     String table = "testAssignRegionOnRestartedServer";
284     TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20);
285     TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
286     TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect
287 
288     ServerName deadServer = null;
289     HMaster master = null;
290     try {
291       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
292       desc.addFamily(new HColumnDescriptor(FAMILY));
293       admin.createTable(desc);
294 
295       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
296       final HRegionInfo hri = new HRegionInfo(
297         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
298       MetaTableAccessor.addRegionToMeta(meta, hri);
299 
300       master = TEST_UTIL.getHBaseCluster().getMaster();
301       Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
302       assertFalse("There should be some servers online", onlineServers.isEmpty());
303 
304       // Use the first server as the destination server
305       ServerName destServer = onlineServers.iterator().next();
306 
307       // Created faked dead server
308       deadServer = ServerName.valueOf(destServer.getHostname(),
309           destServer.getPort(), destServer.getStartcode() - 100L);
310       master.serverManager.recordNewServerWithLock(deadServer, ServerLoad.EMPTY_SERVERLOAD);
311 
312       final AssignmentManager am = master.getAssignmentManager();
313       RegionPlan plan = new RegionPlan(hri, null, deadServer);
314       am.addPlan(hri.getEncodedName(), plan);
315       master.assignRegion(hri);
316 
317       int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
318         destServer, EventType.M_ZK_REGION_OFFLINE,
319         EventType.RS_ZK_REGION_OPENING, 0);
320       assertEquals("TansitionNode should fail", -1, version);
321 
322       TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
323         @Override
324         public boolean evaluate() throws Exception {
325           return ! am.getRegionStates().isRegionInTransition(hri);
326         }
327       });
328 
329     assertFalse("Region should be assigned", am.getRegionStates().isRegionInTransition(hri));
330     } finally {
331       if (deadServer != null) {
332         master.serverManager.expireServer(deadServer);
333       }
334 
335       TEST_UTIL.deleteTable(Bytes.toBytes(table));
336 
337       // reset the value for other tests
338       TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 3);
339       ServerName masterServerName = TEST_UTIL.getMiniHBaseCluster().getMaster().getServerName();
340       TEST_UTIL.getMiniHBaseCluster().stopMaster(masterServerName);
341       TEST_UTIL.getMiniHBaseCluster().startMaster();
342       // Wait till master is active and is initialized
343       while (TEST_UTIL.getMiniHBaseCluster().getMaster() == null ||
344           !TEST_UTIL.getMiniHBaseCluster().getMaster().isInitialized()) {
345         Threads.sleep(1);
346       }
347     }
348   }
349 
350   /**
351    * This tests offlining a region
352    */
353   @Test (timeout=60000)
354   public void testOfflineRegion() throws Exception {
355     TableName table =
356         TableName.valueOf("testOfflineRegion");
357     try {
358       HRegionInfo hri = createTableAndGetOneRegion(table);
359 
360       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
361         getMaster().getAssignmentManager().getRegionStates();
362       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
363       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
364       admin.offline(hri.getRegionName());
365 
366       long timeoutTime = System.currentTimeMillis() + 800;
367       while (true) {
368         if (regionStates.getRegionByStateOfTable(table)
369             .get(RegionState.State.OFFLINE).contains(hri))
370           break;
371         long now = System.currentTimeMillis();
372         if (now > timeoutTime) {
373           fail("Failed to offline the region in time");
374           break;
375         }
376         Thread.sleep(10);
377       }
378       RegionState regionState = regionStates.getRegionState(hri);
379       assertTrue(regionState.isOffline());
380     } finally {
381       TEST_UTIL.deleteTable(table);
382     }
383   }
384 
385   /**
386    * This tests moving a region
387    */
388   @Test (timeout=50000)
389   public void testMoveRegion() throws Exception {
390     TableName table =
391         TableName.valueOf("testMoveRegion");
392     try {
393       HRegionInfo hri = createTableAndGetOneRegion(table);
394 
395       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
396       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
397       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
398       ServerManager serverManager = master.getServerManager();
399       ServerName destServerName = null;
400       List<JVMClusterUtil.RegionServerThread> regionServers =
401         TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads();
402       for (JVMClusterUtil.RegionServerThread regionServer: regionServers) {
403         HRegionServer destServer = regionServer.getRegionServer();
404         destServerName = destServer.getServerName();
405         if (!destServerName.equals(serverName)
406             && serverManager.isServerOnline(destServerName)) {
407           break;
408         }
409       }
410       assertTrue(destServerName != null
411         && !destServerName.equals(serverName));
412       TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
413         Bytes.toBytes(destServerName.getServerName()));
414 
415       long timeoutTime = System.currentTimeMillis() + 30000;
416       while (true) {
417         ServerName sn = regionStates.getRegionServerOfRegion(hri);
418         if (sn != null && sn.equals(destServerName)) {
419           TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
420           break;
421         }
422         long now = System.currentTimeMillis();
423         if (now > timeoutTime) {
424           fail("Failed to move the region in time: "
425             + regionStates.getRegionState(hri));
426         }
427         regionStates.waitForUpdate(50);
428       }
429 
430     } finally {
431       TEST_UTIL.deleteTable(table);
432     }
433   }
434 
435   /**
436    * If a table is deleted, we should not be able to move it anymore.
437    * Otherwise, the region will be brought back.
438    * @throws Exception
439    */
440   @Test (timeout=50000)
441   public void testMoveRegionOfDeletedTable() throws Exception {
442     TableName table =
443         TableName.valueOf("testMoveRegionOfDeletedTable");
444     Admin admin = TEST_UTIL.getHBaseAdmin();
445     try {
446       HRegionInfo hri = createTableAndGetOneRegion(table);
447 
448       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
449       AssignmentManager am = master.getAssignmentManager();
450       RegionStates regionStates = am.getRegionStates();
451       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
452       ServerName destServerName = null;
453       for (int i = 0; i < 3; i++) {
454         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
455         if (!destServer.getServerName().equals(serverName)) {
456           destServerName = destServer.getServerName();
457           break;
458         }
459       }
460       assertTrue(destServerName != null
461         && !destServerName.equals(serverName));
462 
463       TEST_UTIL.deleteTable(table);
464 
465       try {
466         admin.move(hri.getEncodedNameAsBytes(),
467           Bytes.toBytes(destServerName.getServerName()));
468         fail("We should not find the region");
469       } catch (IOException ioe) {
470         assertTrue(ioe instanceof UnknownRegionException);
471       }
472 
473       am.balance(new RegionPlan(hri, serverName, destServerName));
474       assertFalse("The region should not be in transition",
475         regionStates.isRegionInTransition(hri));
476     } finally {
477       if (admin.tableExists(table)) {
478         TEST_UTIL.deleteTable(table);
479       }
480     }
481   }
482 
483   HRegionInfo createTableAndGetOneRegion(
484       final TableName tableName) throws IOException, InterruptedException {
485     HTableDescriptor desc = new HTableDescriptor(tableName);
486     desc.addFamily(new HColumnDescriptor(FAMILY));
487     admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5);
488 
489     // wait till the table is assigned
490     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
491     long timeoutTime = System.currentTimeMillis() + 1000;
492     while (true) {
493       List<HRegionInfo> regions = master.getAssignmentManager().
494         getRegionStates().getRegionsOfTable(tableName);
495       if (regions.size() > 3) {
496         return regions.get(2);
497       }
498       long now = System.currentTimeMillis();
499       if (now > timeoutTime) {
500         fail("Could not find an online region");
501       }
502       Thread.sleep(10);
503     }
504   }
505 
506   /**
507    * This test should not be flaky. If it is flaky, it means something
508    * wrong with AssignmentManager which should be reported and fixed
509    *
510    * This tests forcefully assign a region while it's closing and re-assigned.
511    */
512   @Test (timeout=60000)
513   public void testForceAssignWhileClosing() throws Exception {
514     String table = "testForceAssignWhileClosing";
515     try {
516       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
517       desc.addFamily(new HColumnDescriptor(FAMILY));
518       admin.createTable(desc);
519 
520       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
521       HRegionInfo hri = new HRegionInfo(
522         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
523       MetaTableAccessor.addRegionToMeta(meta, hri);
524 
525       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
526       master.assignRegion(hri);
527       AssignmentManager am = master.getAssignmentManager();
528       assertTrue(am.waitForAssignment(hri));
529 
530       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
531       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
532       MyRegionObserver.preCloseEnabled.set(true);
533       am.unassign(hri);
534       RegionState state = am.getRegionStates().getRegionState(hri);
535       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
536 
537       MyRegionObserver.preCloseEnabled.set(false);
538       am.unassign(hri, true);
539 
540       // region is closing now, will be re-assigned automatically.
541       // now, let's forcefully assign it again. it should be
542       // assigned properly and no double-assignment
543       am.assign(hri, true, true);
544 
545       // let's check if it's assigned after it's out of transition
546       am.waitOnRegionToClearRegionsInTransition(hri);
547       assertTrue(am.waitForAssignment(hri));
548 
549       ServerName serverName = master.getAssignmentManager().
550         getRegionStates().getRegionServerOfRegion(hri);
551       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
552     } finally {
553       MyRegionObserver.preCloseEnabled.set(false);
554       TEST_UTIL.deleteTable(Bytes.toBytes(table));
555     }
556   }
557 
558   /**
559    * This tests region close failed
560    */
561   @Test (timeout=60000)
562   public void testCloseFailed() throws Exception {
563     String table = "testCloseFailed";
564     try {
565       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
566       desc.addFamily(new HColumnDescriptor(FAMILY));
567       admin.createTable(desc);
568 
569       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
570       HRegionInfo hri = new HRegionInfo(
571         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
572       MetaTableAccessor.addRegionToMeta(meta, hri);
573 
574       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
575       master.assignRegion(hri);
576       AssignmentManager am = master.getAssignmentManager();
577       assertTrue(am.waitForAssignment(hri));
578       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
579       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
580 
581       MyRegionObserver.preCloseEnabled.set(true);
582       am.unassign(hri);
583       RegionState state = am.getRegionStates().getRegionState(hri);
584       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
585 
586       MyRegionObserver.preCloseEnabled.set(false);
587       am.unassign(hri, true);
588 
589       // region may still be assigned now since it's closing,
590       // let's check if it's assigned after it's out of transition
591       am.waitOnRegionToClearRegionsInTransition(hri);
592 
593       // region should be closed and re-assigned
594       assertTrue(am.waitForAssignment(hri));
595       ServerName serverName = master.getAssignmentManager().
596         getRegionStates().getRegionServerOfRegion(hri);
597       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
598     } finally {
599       MyRegionObserver.preCloseEnabled.set(false);
600       TEST_UTIL.deleteTable(Bytes.toBytes(table));
601     }
602   }
603 
604   /**
605    * This tests region open failed
606    */
607   @Test (timeout=60000)
608   public void testOpenFailed() throws Exception {
609     String table = "testOpenFailed";
610     try {
611       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
612       desc.addFamily(new HColumnDescriptor(FAMILY));
613       admin.createTable(desc);
614 
615       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
616       HRegionInfo hri = new HRegionInfo(
617         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
618       MetaTableAccessor.addRegionToMeta(meta, hri);
619 
620       MyLoadBalancer.controledRegion = hri;
621 
622       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
623       master.assignRegion(hri);
624       AssignmentManager am = master.getAssignmentManager();
625       assertFalse(am.waitForAssignment(hri));
626 
627       RegionState state = am.getRegionStates().getRegionState(hri);
628       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
629       // Failed to open since no plan, so it's on no server
630       assertNull(state.getServerName());
631 
632       MyLoadBalancer.controledRegion = null;
633       master.assignRegion(hri);
634       assertTrue(am.waitForAssignment(hri));
635 
636       ServerName serverName = master.getAssignmentManager().
637         getRegionStates().getRegionServerOfRegion(hri);
638       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
639     } finally {
640       MyLoadBalancer.controledRegion = null;
641       TEST_UTIL.deleteTable(Bytes.toBytes(table));
642     }
643   }
644 
645   /**
646    * This tests round-robin assignment failed due to no bulkplan
647    */
648   @Test (timeout=60000)
649   public void testRoundRobinAssignmentFailed() throws Exception {
650     TableName tableName = TableName.valueOf("testRoundRobinAssignmentFailed");
651     try {
652       HTableDescriptor desc = new HTableDescriptor(tableName);
653       desc.addFamily(new HColumnDescriptor(FAMILY));
654       admin.createTable(desc);
655 
656       Table meta = admin.getConnection().getTable(TableName.META_TABLE_NAME);
657       HRegionInfo hri = new HRegionInfo(
658         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
659       MetaTableAccessor.addRegionToMeta(meta, hri);
660 
661       MyLoadBalancer.controledRegion = hri;
662 
663       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
664       AssignmentManager am = master.getAssignmentManager();
665       // round-robin assignment but balancer cannot find a plan
666       // assignment should fail
667       am.assign(Arrays.asList(hri));
668 
669       // if bulk assignment cannot update region state to online
670       // or failed_open this waits until timeout
671       assertFalse(am.waitForAssignment(hri));
672       RegionState state = am.getRegionStates().getRegionState(hri);
673       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
674       // Failed to open since no plan, so it's on no server
675       assertNull(state.getServerName());
676 
677       // try again with valid plan
678       MyLoadBalancer.controledRegion = null;
679       am.assign(Arrays.asList(hri));
680       assertTrue(am.waitForAssignment(hri));
681 
682       ServerName serverName = master.getAssignmentManager().
683         getRegionStates().getRegionServerOfRegion(hri);
684       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
685     } finally {
686       MyLoadBalancer.controledRegion = null;
687       TEST_UTIL.deleteTable(tableName);
688     }
689   }
690 
691   /**
692    * This tests retain assignment failed due to no bulkplan
693    */
694   @Test (timeout=60000)
695   public void testRetainAssignmentFailed() throws Exception {
696     TableName tableName = TableName.valueOf("testRetainAssignmentFailed");
697     try {
698       HTableDescriptor desc = new HTableDescriptor(tableName);
699       desc.addFamily(new HColumnDescriptor(FAMILY));
700       admin.createTable(desc);
701 
702       Table meta = TEST_UTIL.getConnection().getTable(TableName.META_TABLE_NAME);
703       HRegionInfo hri = new HRegionInfo(
704         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
705       MetaTableAccessor.addRegionToMeta(meta, hri);
706 
707       MyLoadBalancer.controledRegion = hri;
708 
709       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
710       AssignmentManager am = master.getAssignmentManager();
711 
712       Map<HRegionInfo, ServerName> regions = new HashMap<HRegionInfo, ServerName>();
713       ServerName dest = TEST_UTIL.getHBaseCluster().getRegionServer(0).getServerName();
714       regions.put(hri, dest);
715       // retainAssignment but balancer cannot find a plan
716       // assignment should fail
717       am.assign(regions);
718 
719       // if retain assignment cannot update region state to online
720       // or failed_open this waits until timeout
721       assertFalse(am.waitForAssignment(hri));
722       RegionState state = am.getRegionStates().getRegionState(hri);
723       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
724       // Failed to open since no plan, so it's on no server
725       assertNull(state.getServerName());
726 
727       // try retainAssigment again with valid plan
728       MyLoadBalancer.controledRegion = null;
729       am.assign(regions);
730       assertTrue(am.waitForAssignment(hri));
731 
732       ServerName serverName = master.getAssignmentManager().
733         getRegionStates().getRegionServerOfRegion(hri);
734       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
735 
736       // it retains on same server as specified
737       assertEquals(serverName, dest);
738     } finally {
739       MyLoadBalancer.controledRegion = null;
740       TEST_UTIL.deleteTable(tableName);
741     }
742   }
743 
744   /**
745    * This tests region open failure which is not recoverable
746    */
747   @Test (timeout=60000)
748   public void testOpenFailedUnrecoverable() throws Exception {
749     TableName table =
750         TableName.valueOf("testOpenFailedUnrecoverable");
751     try {
752       HTableDescriptor desc = new HTableDescriptor(table);
753       desc.addFamily(new HColumnDescriptor(FAMILY));
754       admin.createTable(desc);
755 
756       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
757       HRegionInfo hri = new HRegionInfo(
758         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
759       MetaTableAccessor.addRegionToMeta(meta, hri);
760 
761       FileSystem fs = FileSystem.get(conf);
762       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
763       Path regionDir = new Path(tableDir, hri.getEncodedName());
764       // create a file named the same as the region dir to
765       // mess up with region opening
766       fs.create(regionDir, true);
767 
768       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
769       master.assignRegion(hri);
770       AssignmentManager am = master.getAssignmentManager();
771       assertFalse(am.waitForAssignment(hri));
772 
773       RegionState state = am.getRegionStates().getRegionState(hri);
774       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
775       // Failed to open due to file system issue. Region state should
776       // carry the opening region server so that we can force close it
777       // later on before opening it again. See HBASE-9092.
778       assertNotNull(state.getServerName());
779 
780       // remove the blocking file, so that region can be opened
781       fs.delete(regionDir, true);
782       master.assignRegion(hri);
783       assertTrue(am.waitForAssignment(hri));
784 
785       ServerName serverName = master.getAssignmentManager().
786         getRegionStates().getRegionServerOfRegion(hri);
787       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
788     } finally {
789       TEST_UTIL.deleteTable(table);
790     }
791   }
792 
793   @Test (timeout=60000)
794   public void testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState() throws Exception {
795     final TableName table =
796         TableName.valueOf
797             ("testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState");
798     AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
799     HRegionInfo hri = null;
800     ServerName serverName = null;
801     try {
802       hri = createTableAndGetOneRegion(table);
803       serverName = am.getRegionStates().getRegionServerOfRegion(hri);
804       ServerName destServerName = null;
805       HRegionServer destServer = null;
806       for (int i = 0; i < 3; i++) {
807         destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
808         if (!destServer.getServerName().equals(serverName)) {
809           destServerName = destServer.getServerName();
810           break;
811         }
812       }
813       am.regionOffline(hri);
814       ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper();
815       am.getRegionStates().updateRegionState(hri, State.PENDING_OPEN, destServerName);
816       if (ConfigUtil.useZKForAssignment(conf)) {
817         ZKAssign.createNodeOffline(zkw, hri, destServerName);
818         ZKAssign.transitionNodeOpening(zkw, hri, destServerName);
819 
820         // Wait till the event is processed and the region is in transition
821         long timeoutTime = System.currentTimeMillis() + 20000;
822         while (!am.getRegionStates().isRegionInTransition(hri)) {
823           assertTrue("Failed to process ZK opening event in time",
824             System.currentTimeMillis() < timeoutTime);
825           Thread.sleep(100);
826         }
827       }
828 
829       am.getTableStateManager().setTableState(table, ZooKeeperProtos.Table.State.DISABLING);
830       List<HRegionInfo> toAssignRegions = am.processServerShutdown(destServerName);
831       assertTrue("Regions to be assigned should be empty.", toAssignRegions.isEmpty());
832       assertTrue("Regions to be assigned should be empty.", am.getRegionStates()
833           .getRegionState(hri).isOffline());
834     } finally {
835       if (hri != null && serverName != null) {
836         am.regionOnline(hri, serverName);
837       }
838       am.getTableStateManager().setTableState(table, ZooKeeperProtos.Table.State.DISABLED);
839       TEST_UTIL.deleteTable(table);
840     }
841   }
842 
843   /**
844    * This tests region close hanging
845    */
846   @Test (timeout=60000)
847   public void testCloseHang() throws Exception {
848     String table = "testCloseHang";
849     try {
850       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
851       desc.addFamily(new HColumnDescriptor(FAMILY));
852       admin.createTable(desc);
853 
854       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
855       HRegionInfo hri = new HRegionInfo(
856         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
857       MetaTableAccessor.addRegionToMeta(meta, hri);
858 
859       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
860       master.assignRegion(hri);
861       AssignmentManager am = master.getAssignmentManager();
862       assertTrue(am.waitForAssignment(hri));
863       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
864       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
865 
866       MyRegionObserver.postCloseEnabled.set(true);
867       am.unassign(hri);
868       // Now region should pending_close or closing
869       // Unassign it again forcefully so that we can trigger already
870       // in transition exception. This test is to make sure this scenario
871       // is handled properly.
872       am.server.getConfiguration().setLong(
873         AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
874       am.unassign(hri, true);
875       RegionState state = am.getRegionStates().getRegionState(hri);
876       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
877 
878       // Let region closing move ahead. The region should be closed
879       // properly and re-assigned automatically
880       MyRegionObserver.postCloseEnabled.set(false);
881 
882       // region may still be assigned now since it's closing,
883       // let's check if it's assigned after it's out of transition
884       am.waitOnRegionToClearRegionsInTransition(hri);
885 
886       // region should be closed and re-assigned
887       assertTrue(am.waitForAssignment(hri));
888       ServerName serverName = master.getAssignmentManager().
889         getRegionStates().getRegionServerOfRegion(hri);
890       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
891     } finally {
892       MyRegionObserver.postCloseEnabled.set(false);
893       TEST_UTIL.deleteTable(Bytes.toBytes(table));
894     }
895   }
896 
897   /**
898    * This tests region close racing with open
899    */
900   @Test (timeout=60000)
901   public void testOpenCloseRacing() throws Exception {
902     String table = "testOpenCloseRacing";
903     try {
904       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
905       desc.addFamily(new HColumnDescriptor(FAMILY));
906       admin.createTable(desc);
907 
908       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
909       HRegionInfo hri = new HRegionInfo(
910         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
911       MetaTableAccessor.addRegionToMeta(meta, hri);
912       meta.close();
913 
914       MyRegionObserver.postOpenEnabled.set(true);
915       MyRegionObserver.postOpenCalled = false;
916       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
917       // Region will be opened, but it won't complete
918       master.assignRegion(hri);
919       long end = EnvironmentEdgeManager.currentTime() + 20000;
920       // Wait till postOpen is called
921       while (!MyRegionObserver.postOpenCalled ) {
922         assertFalse("Timed out waiting for postOpen to be called",
923           EnvironmentEdgeManager.currentTime() > end);
924         Thread.sleep(300);
925       }
926 
927       AssignmentManager am = master.getAssignmentManager();
928       // Now let's unassign it, it should do nothing
929       am.unassign(hri);
930       RegionState state = am.getRegionStates().getRegionState(hri);
931       ServerName oldServerName = state.getServerName();
932       assertTrue(state.isPendingOpenOrOpening() && oldServerName != null);
933 
934       // Now the region is stuck in opening
935       // Let's forcefully re-assign it to trigger closing/opening
936       // racing. This test is to make sure this scenario
937       // is handled properly.
938       ServerName destServerName = null;
939       int numRS = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size();
940       for (int i = 0; i < numRS; i++) {
941         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
942         if (!destServer.getServerName().equals(oldServerName)) {
943           destServerName = destServer.getServerName();
944           break;
945         }
946       }
947       assertNotNull(destServerName);
948       assertFalse("Region should be assigned on a new region server",
949         oldServerName.equals(destServerName));
950       List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
951       regions.add(hri);
952       am.assign(destServerName, regions);
953 
954       // let region open continue
955       MyRegionObserver.postOpenEnabled.set(false);
956 
957       // let's check if it's assigned after it's out of transition
958       am.waitOnRegionToClearRegionsInTransition(hri);
959       assertTrue(am.waitForAssignment(hri));
960 
961       ServerName serverName = master.getAssignmentManager().
962         getRegionStates().getRegionServerOfRegion(hri);
963       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
964     } finally {
965       MyRegionObserver.postOpenEnabled.set(false);
966       TEST_UTIL.deleteTable(Bytes.toBytes(table));
967     }
968   }
969 
970   /**
971    * Test force unassign/assign a region hosted on a dead server
972    */
973   @Test (timeout=60000)
974   public void testAssignRacingWithSSH() throws Exception {
975     String table = "testAssignRacingWithSSH";
976     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
977     MyMaster master = null;
978     try {
979       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
980       desc.addFamily(new HColumnDescriptor(FAMILY));
981       admin.createTable(desc);
982 
983       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
984       HRegionInfo hri = new HRegionInfo(
985         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
986       MetaTableAccessor.addRegionToMeta(meta, hri);
987 
988       // Assign the region
989       master = (MyMaster)cluster.getMaster();
990       master.assignRegion(hri);
991 
992       // Hold SSH before killing the hosting server
993       master.enableSSH(false);
994 
995       AssignmentManager am = master.getAssignmentManager();
996       RegionStates regionStates = am.getRegionStates();
997       ServerName metaServer = regionStates.getRegionServerOfRegion(
998         HRegionInfo.FIRST_META_REGIONINFO);
999       while (true) {
1000         assertTrue(am.waitForAssignment(hri));
1001         RegionState state = regionStates.getRegionState(hri);
1002         ServerName oldServerName = state.getServerName();
1003         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
1004           // Kill the hosting server, which doesn't have meta on it.
1005           cluster.killRegionServer(oldServerName);
1006           cluster.waitForRegionServerToStop(oldServerName, -1);
1007           break;
1008         }
1009         int i = cluster.getServerWithMeta();
1010         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
1011         oldServerName = rs.getServerName();
1012         master.move(hri.getEncodedNameAsBytes(),
1013           Bytes.toBytes(oldServerName.getServerName()));
1014       }
1015 
1016       // You can't assign a dead region before SSH
1017       am.assign(hri, true, true);
1018       RegionState state = regionStates.getRegionState(hri);
1019       assertTrue(state.isFailedClose());
1020 
1021       // You can't unassign a dead region before SSH either
1022       am.unassign(hri, true);
1023       assertTrue(state.isFailedClose());
1024 
1025       // Enable SSH so that log can be split
1026       master.enableSSH(true);
1027 
1028       // let's check if it's assigned after it's out of transition.
1029       // no need to assign it manually, SSH should do it
1030       am.waitOnRegionToClearRegionsInTransition(hri);
1031       assertTrue(am.waitForAssignment(hri));
1032 
1033       ServerName serverName = master.getAssignmentManager().
1034         getRegionStates().getRegionServerOfRegion(hri);
1035       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
1036     } finally {
1037       if (master != null) {
1038         master.enableSSH(true);
1039       }
1040       TEST_UTIL.deleteTable(Bytes.toBytes(table));
1041       cluster.startRegionServer();
1042     }
1043   }
1044 
1045   /**
1046    * Test force unassign/assign a region of a disabled table
1047    */
1048   @Test (timeout=60000)
1049   public void testAssignDisabledRegion() throws Exception {
1050     TableName table = TableName.valueOf("testAssignDisabledRegion");
1051     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1052     MyMaster master = null;
1053     try {
1054       HTableDescriptor desc = new HTableDescriptor(table);
1055       desc.addFamily(new HColumnDescriptor(FAMILY));
1056       admin.createTable(desc);
1057 
1058       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
1059       HRegionInfo hri = new HRegionInfo(
1060         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
1061       MetaTableAccessor.addRegionToMeta(meta, hri);
1062 
1063       // Assign the region
1064       master = (MyMaster)cluster.getMaster();
1065       master.assignRegion(hri);
1066       AssignmentManager am = master.getAssignmentManager();
1067       RegionStates regionStates = am.getRegionStates();
1068       assertTrue(am.waitForAssignment(hri));
1069 
1070       // Disable the table
1071       admin.disableTable(table);
1072       assertTrue(regionStates.isRegionOffline(hri));
1073 
1074       // You can't assign a disabled region
1075       am.assign(hri, true, true);
1076       assertTrue(regionStates.isRegionOffline(hri));
1077 
1078       // You can't unassign a disabled region either
1079       am.unassign(hri, true);
1080       assertTrue(regionStates.isRegionOffline(hri));
1081     } finally {
1082       TEST_UTIL.deleteTable(table);
1083     }
1084   }
1085 
1086   /**
1087    * Test offlined region is assigned by SSH
1088    */
1089   @Test (timeout=60000)
1090   public void testAssignOfflinedRegionBySSH() throws Exception {
1091     String table = "testAssignOfflinedRegionBySSH";
1092     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1093     MyMaster master = null;
1094     try {
1095       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
1096       desc.addFamily(new HColumnDescriptor(FAMILY));
1097       admin.createTable(desc);
1098 
1099       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
1100       HRegionInfo hri = new HRegionInfo(
1101         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
1102       MetaTableAccessor.addRegionToMeta(meta, hri);
1103 
1104       // Assign the region
1105       master = (MyMaster)cluster.getMaster();
1106       master.assignRegion(hri);
1107 
1108       AssignmentManager am = master.getAssignmentManager();
1109       RegionStates regionStates = am.getRegionStates();
1110       ServerName metaServer = regionStates.getRegionServerOfRegion(
1111         HRegionInfo.FIRST_META_REGIONINFO);
1112       ServerName oldServerName = null;
1113       while (true) {
1114         assertTrue(am.waitForAssignment(hri));
1115         RegionState state = regionStates.getRegionState(hri);
1116         oldServerName = state.getServerName();
1117         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
1118           // Mark the hosting server aborted, but don't actually kill it.
1119           // It doesn't have meta on it.
1120           MyRegionServer.abortedServer = oldServerName;
1121           break;
1122         }
1123         int i = cluster.getServerWithMeta();
1124         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
1125         oldServerName = rs.getServerName();
1126         master.move(hri.getEncodedNameAsBytes(),
1127           Bytes.toBytes(oldServerName.getServerName()));
1128       }
1129 
1130       // Make sure the region is assigned on the dead server
1131       assertTrue(regionStates.isRegionOnline(hri));
1132       assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
1133 
1134       // Kill the hosting server, which doesn't have meta on it.
1135       cluster.killRegionServer(oldServerName);
1136       cluster.waitForRegionServerToStop(oldServerName, -1);
1137 
1138       ServerManager serverManager = master.getServerManager();
1139       while (!serverManager.isServerDead(oldServerName)
1140           || serverManager.getDeadServers().areDeadServersInProgress()) {
1141         Thread.sleep(100);
1142       }
1143 
1144       // Let's check if it's assigned after it's out of transition.
1145       // no need to assign it manually, SSH should do it
1146       am.waitOnRegionToClearRegionsInTransition(hri);
1147       assertTrue(am.waitForAssignment(hri));
1148 
1149       ServerName serverName = master.getAssignmentManager().
1150         getRegionStates().getRegionServerOfRegion(hri);
1151       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
1152     } finally {
1153       MyRegionServer.abortedServer = null;
1154       TEST_UTIL.deleteTable(Bytes.toBytes(table));
1155       cluster.startRegionServer();
1156     }
1157   }
1158 
1159   /**
1160    * Test SSH waiting for extra region server for assignment
1161    */
1162   @Test (timeout=300000)
1163   public void testSSHWaitForServerToAssignRegion() throws Exception {
1164     TableName table = TableName.valueOf("testSSHWaitForServerToAssignRegion");
1165     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1166     boolean startAServer = false;
1167     try {
1168       HTableDescriptor desc = new HTableDescriptor(table);
1169       desc.addFamily(new HColumnDescriptor(FAMILY));
1170       admin.createTable(desc);
1171 
1172       HMaster master = cluster.getMaster();
1173       final ServerManager serverManager = master.getServerManager();
1174       MyLoadBalancer.countRegionServers = Integer.valueOf(
1175         serverManager.countOfRegionServers());
1176       HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(table);
1177       assertNotNull("First region should be assigned", rs);
1178       final ServerName serverName = rs.getServerName();
1179       // Wait till SSH tried to assign regions a several times
1180       int counter = MyLoadBalancer.counter.get() + 5;
1181       cluster.killRegionServer(serverName);
1182       startAServer = true;
1183       cluster.waitForRegionServerToStop(serverName, -1);
1184       while (counter > MyLoadBalancer.counter.get()) {
1185         Thread.sleep(1000);
1186       }
1187       cluster.startRegionServer();
1188       startAServer = false;
1189       // Wait till the dead server is processed by SSH
1190       TEST_UTIL.waitFor(120000, 1000, new Waiter.Predicate<Exception>() {
1191         @Override
1192         public boolean evaluate() throws Exception {
1193           return serverManager.isServerDead(serverName)
1194             && !serverManager.areDeadServersInProgress();
1195         }
1196       });
1197       TEST_UTIL.waitUntilAllRegionsAssigned(table, 300000);
1198 
1199       rs = TEST_UTIL.getRSForFirstRegionInTable(table);
1200       assertTrue("First region should be re-assigned to a different server",
1201         rs != null && !serverName.equals(rs.getServerName()));
1202     } finally {
1203       MyLoadBalancer.countRegionServers = null;
1204       TEST_UTIL.deleteTable(table);
1205       if (startAServer) {
1206         cluster.startRegionServer();
1207       }
1208     }
1209   }
1210 
1211   /**
1212    * Test disabled region is ignored by SSH
1213    */
1214   @Test (timeout=60000)
1215   public void testAssignDisabledRegionBySSH() throws Exception {
1216     String table = "testAssignDisabledRegionBySSH";
1217     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1218     MyMaster master = null;
1219     try {
1220       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
1221       desc.addFamily(new HColumnDescriptor(FAMILY));
1222       admin.createTable(desc);
1223 
1224       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
1225       HRegionInfo hri = new HRegionInfo(
1226         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
1227       MetaTableAccessor.addRegionToMeta(meta, hri);
1228 
1229       // Assign the region
1230       master = (MyMaster)cluster.getMaster();
1231       master.assignRegion(hri);
1232 
1233       AssignmentManager am = master.getAssignmentManager();
1234       RegionStates regionStates = am.getRegionStates();
1235       ServerName metaServer = regionStates.getRegionServerOfRegion(
1236         HRegionInfo.FIRST_META_REGIONINFO);
1237       ServerName oldServerName = null;
1238       while (true) {
1239         assertTrue(am.waitForAssignment(hri));
1240         RegionState state = regionStates.getRegionState(hri);
1241         oldServerName = state.getServerName();
1242         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
1243           // Mark the hosting server aborted, but don't actually kill it.
1244           // It doesn't have meta on it.
1245           MyRegionServer.abortedServer = oldServerName;
1246           break;
1247         }
1248         int i = cluster.getServerWithMeta();
1249         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
1250         oldServerName = rs.getServerName();
1251         master.move(hri.getEncodedNameAsBytes(),
1252           Bytes.toBytes(oldServerName.getServerName()));
1253       }
1254 
1255       // Make sure the region is assigned on the dead server
1256       assertTrue(regionStates.isRegionOnline(hri));
1257       assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
1258 
1259       // Disable the table now.
1260       master.disableTable(hri.getTable(), HConstants.NO_NONCE, HConstants.NO_NONCE);
1261 
1262       // Kill the hosting server, which doesn't have meta on it.
1263       cluster.killRegionServer(oldServerName);
1264       cluster.waitForRegionServerToStop(oldServerName, -1);
1265 
1266       ServerManager serverManager = master.getServerManager();
1267       while (!serverManager.isServerDead(oldServerName)
1268           || serverManager.getDeadServers().areDeadServersInProgress()) {
1269         Thread.sleep(100);
1270       }
1271 
1272       // Wait till no more RIT, the region should be offline.
1273       am.waitUntilNoRegionsInTransition(60000);
1274       assertTrue(regionStates.isRegionOffline(hri));
1275     } finally {
1276       MyRegionServer.abortedServer = null;
1277       TEST_UTIL.deleteTable(Bytes.toBytes(table));
1278       cluster.startRegionServer();
1279     }
1280   }
1281 
1282   /**
1283    * Test that region state transition call is idempotent
1284    */
1285   @Test(timeout = 60000)
1286   public void testReportRegionStateTransition() throws Exception {
1287     String table = "testReportRegionStateTransition";
1288     try {
1289       MyRegionServer.simulateRetry = true;
1290       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
1291       desc.addFamily(new HColumnDescriptor(FAMILY));
1292       admin.createTable(desc);
1293       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
1294       HRegionInfo hri =
1295           new HRegionInfo(desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
1296       MetaTableAccessor.addRegionToMeta(meta, hri);
1297       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1298       master.assignRegion(hri);
1299       AssignmentManager am = master.getAssignmentManager();
1300       am.waitForAssignment(hri);
1301       RegionStates regionStates = am.getRegionStates();
1302       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1303       // Assert the the region is actually open on the server
1304       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
1305       // Closing region should just work fine
1306       admin.disableTable(TableName.valueOf(table));
1307       assertTrue(regionStates.isRegionOffline(hri));
1308       List<HRegionInfo> regions = TEST_UTIL.getHBaseAdmin().getOnlineRegions(serverName);
1309       assertTrue(!regions.contains(hri));
1310     } finally {
1311       MyRegionServer.simulateRetry = false;
1312       TEST_UTIL.deleteTable(Bytes.toBytes(table));
1313     }
1314   }
1315 
1316   /**
1317    * Test concurrent updates to meta when meta is not on master
1318    * @throws Exception
1319    */
1320   @Test(timeout = 30000)
1321   public void testUpdatesRemoteMeta() throws Exception {
1322     // Not for zk less assignment
1323     if (conf.getBoolean("hbase.assignment.usezk", true)) {
1324       return;
1325     }
1326     conf.setInt("hbase.regionstatestore.meta.connection", 3);
1327     final RegionStateStore rss =
1328         new RegionStateStore(new MyRegionServer(conf, new ZkCoordinatedStateManager()));
1329     rss.start();
1330     // Create 10 threads and make each do 10 puts related to region state update
1331     Thread[] th = new Thread[10];
1332     List<String> nameList = new ArrayList<String>();
1333     List<TableName> tableNameList = new ArrayList<TableName>();
1334     for (int i = 0; i < th.length; i++) {
1335       th[i] = new Thread() {
1336         @Override
1337         public void run() {
1338           HRegionInfo[] hri = new HRegionInfo[10];
1339           ServerName serverName = ServerName.valueOf("dummyhost", 1000, 1234);
1340           for (int i = 0; i < 10; i++) {
1341             hri[i] = new HRegionInfo(TableName.valueOf(Thread.currentThread().getName() + "_" + i));
1342             RegionState newState = new RegionState(hri[i], RegionState.State.OPEN, serverName);
1343             RegionState oldState =
1344                 new RegionState(hri[i], RegionState.State.PENDING_OPEN, serverName);
1345             rss.updateRegionState(1, newState, oldState);
1346           }
1347         }
1348       };
1349       th[i].start();
1350       nameList.add(th[i].getName());
1351     }
1352     for (int i = 0; i < th.length; i++) {
1353       th[i].join();
1354     }
1355     // Add all the expected table names in meta to tableNameList
1356     for (String name : nameList) {
1357       for (int i = 0; i < 10; i++) {
1358         tableNameList.add(TableName.valueOf(name + "_" + i));
1359       }
1360     }
1361     List<Result> metaRows = MetaTableAccessor.fullScanOfMeta(admin.getConnection());
1362     int count = 0;
1363     // Check all 100 rows are in meta
1364     for (Result result : metaRows) {
1365       if (tableNameList.contains(HRegionInfo.getTable(result.getRow()))) {
1366         count++;
1367         if (count == 100) {
1368           break;
1369         }
1370       }
1371     }
1372     assertTrue(count == 100);
1373     rss.stop();
1374   }
1375 
1376   static class MyLoadBalancer extends StochasticLoadBalancer {
1377     // For this region, if specified, always assign to nowhere
1378     static volatile HRegionInfo controledRegion = null;
1379 
1380     static volatile Integer countRegionServers = null;
1381     static AtomicInteger counter = new AtomicInteger(0);
1382 
1383     @Override
1384     public ServerName randomAssignment(HRegionInfo regionInfo,
1385         List<ServerName> servers) {
1386       if (regionInfo.equals(controledRegion)) {
1387         return null;
1388       }
1389       return super.randomAssignment(regionInfo, servers);
1390     }
1391 
1392     @Override
1393     public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(
1394         List<HRegionInfo> regions, List<ServerName> servers) {
1395       if (countRegionServers != null && services != null) {
1396         int regionServers = services.getServerManager().countOfRegionServers();
1397         if (regionServers < countRegionServers.intValue()) {
1398           // Let's wait till more region servers join in.
1399           // Before that, fail region assignments.
1400           counter.incrementAndGet();
1401           return null;
1402         }
1403       }
1404       if (regions.get(0).equals(controledRegion)) {
1405         Map<ServerName, List<HRegionInfo>> m = Maps.newHashMap();
1406         m.put(LoadBalancer.BOGUS_SERVER_NAME, regions);
1407         return m;
1408       }
1409       return super.roundRobinAssignment(regions, servers);
1410     }
1411 
1412     @Override
1413     public Map<ServerName, List<HRegionInfo>> retainAssignment(
1414         Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
1415       for (HRegionInfo hri : regions.keySet()) {
1416         if (hri.equals(controledRegion)) {
1417           Map<ServerName, List<HRegionInfo>> m = Maps.newHashMap();
1418           m.put(LoadBalancer.BOGUS_SERVER_NAME, Lists.newArrayList(regions.keySet()));
1419           return m;
1420         }
1421       }
1422       return super.retainAssignment(regions, servers);
1423     }
1424   }
1425 
1426   public static class MyMaster extends HMaster {
1427     AtomicBoolean enabled = new AtomicBoolean(true);
1428 
1429     public MyMaster(Configuration conf, CoordinatedStateManager cp)
1430       throws IOException, KeeperException,
1431         InterruptedException {
1432       super(conf, cp);
1433     }
1434 
1435     @Override
1436     public boolean isServerShutdownHandlerEnabled() {
1437       return enabled.get() && super.isServerShutdownHandlerEnabled();
1438     }
1439 
1440     public void enableSSH(boolean enabled) {
1441       this.enabled.set(enabled);
1442       if (enabled) {
1443         serverManager.processQueuedDeadServers();
1444       }
1445     }
1446   }
1447 
1448   public static class MyRegionServer extends MiniHBaseClusterRegionServer {
1449     static volatile ServerName abortedServer = null;
1450     static volatile boolean simulateRetry = false;
1451 
1452     public MyRegionServer(Configuration conf, CoordinatedStateManager cp)
1453       throws IOException, KeeperException,
1454         InterruptedException {
1455       super(conf, cp);
1456     }
1457 
1458     @Override
1459     public boolean reportRegionStateTransition(TransitionCode code, long openSeqNum,
1460         HRegionInfo... hris) {
1461       if (simulateRetry) {
1462         // Simulate retry by calling the method twice
1463         super.reportRegionStateTransition(code, openSeqNum, hris);
1464         return super.reportRegionStateTransition(code, openSeqNum, hris);
1465       }
1466       return super.reportRegionStateTransition(code, openSeqNum, hris);
1467     }
1468 
1469     @Override
1470     public boolean isAborted() {
1471       return getServerName().equals(abortedServer) || super.isAborted();
1472     }
1473   }
1474 
1475   public static class MyRegionObserver extends BaseRegionObserver {
1476     // If enabled, fail all preClose calls
1477     static AtomicBoolean preCloseEnabled = new AtomicBoolean(false);
1478 
1479     // If enabled, stall postClose calls
1480     static AtomicBoolean postCloseEnabled = new AtomicBoolean(false);
1481 
1482     // If enabled, stall postOpen calls
1483     static AtomicBoolean postOpenEnabled = new AtomicBoolean(false);
1484 
1485     // A flag to track if postOpen is called
1486     static volatile boolean postOpenCalled = false;
1487 
1488     @Override
1489     public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
1490         boolean abortRequested) throws IOException {
1491       if (preCloseEnabled.get()) throw new IOException("fail preClose from coprocessor");
1492     }
1493 
1494     @Override
1495     public void postClose(ObserverContext<RegionCoprocessorEnvironment> c,
1496         boolean abortRequested) {
1497       stallOnFlag(postCloseEnabled);
1498     }
1499 
1500     @Override
1501     public void postOpen(ObserverContext<RegionCoprocessorEnvironment> c) {
1502       postOpenCalled = true;
1503       stallOnFlag(postOpenEnabled);
1504     }
1505 
1506     private void stallOnFlag(final AtomicBoolean flag) {
1507       try {
1508         // If enabled, stall
1509         while (flag.get()) {
1510           Thread.sleep(1000);
1511         }
1512       } catch (InterruptedException ie) {
1513         Thread.currentThread().interrupt();
1514       }
1515     }
1516   }
1517 }