View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNotSame;
25  import static org.junit.Assert.assertNull;
26  import static org.junit.Assert.assertTrue;
27  import static org.junit.Assert.fail;
28  
29  import java.io.IOException;
30  import java.io.InterruptedIOException;
31  import java.util.Collection;
32  import java.util.List;
33  import java.util.Map;
34  import java.util.concurrent.CountDownLatch;
35  
36  import org.apache.commons.logging.Log;
37  import org.apache.commons.logging.LogFactory;
38  import org.apache.hadoop.conf.Configuration;
39  import org.apache.hadoop.fs.FileSystem;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.hbase.Abortable;
42  import org.apache.hadoop.hbase.CoordinatedStateManager;
43  import org.apache.hadoop.hbase.Coprocessor;
44  import org.apache.hadoop.hbase.CoprocessorEnvironment;
45  import org.apache.hadoop.hbase.HBaseTestingUtility;
46  import org.apache.hadoop.hbase.HColumnDescriptor;
47  import org.apache.hadoop.hbase.HConstants;
48  import org.apache.hadoop.hbase.HRegionInfo;
49  import org.apache.hadoop.hbase.HTableDescriptor;
50  import org.apache.hadoop.hbase.MasterNotRunningException;
51  import org.apache.hadoop.hbase.MetaTableAccessor;
52  import org.apache.hadoop.hbase.MiniHBaseCluster;
53  import org.apache.hadoop.hbase.RegionTransition;
54  import org.apache.hadoop.hbase.Server;
55  import org.apache.hadoop.hbase.ServerName;
56  import org.apache.hadoop.hbase.TableName;
57  import org.apache.hadoop.hbase.UnknownRegionException;
58  import org.apache.hadoop.hbase.Waiter;
59  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
60  import org.apache.hadoop.hbase.client.Admin;
61  import org.apache.hadoop.hbase.client.Connection;
62  import org.apache.hadoop.hbase.client.ConnectionFactory;
63  import org.apache.hadoop.hbase.client.Consistency;
64  import org.apache.hadoop.hbase.client.Delete;
65  import org.apache.hadoop.hbase.client.Get;
66  import org.apache.hadoop.hbase.client.HBaseAdmin;
67  import org.apache.hadoop.hbase.client.HTable;
68  import org.apache.hadoop.hbase.client.Mutation;
69  import org.apache.hadoop.hbase.client.Put;
70  import org.apache.hadoop.hbase.client.Result;
71  import org.apache.hadoop.hbase.client.ResultScanner;
72  import org.apache.hadoop.hbase.client.Scan;
73  import org.apache.hadoop.hbase.client.Table;
74  import org.apache.hadoop.hbase.client.TestReplicasClient.SlowMeCopro;
75  import org.apache.hadoop.hbase.coordination.ZKSplitTransactionCoordination;
76  import org.apache.hadoop.hbase.coordination.ZkCloseRegionCoordination;
77  import org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager;
78  import org.apache.hadoop.hbase.coordination.ZkOpenRegionCoordination;
79  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
80  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
81  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
82  import org.apache.hadoop.hbase.exceptions.DeserializationException;
83  import org.apache.hadoop.hbase.executor.EventType;
84  import org.apache.hadoop.hbase.master.AssignmentManager;
85  import org.apache.hadoop.hbase.master.HMaster;
86  import org.apache.hadoop.hbase.master.RegionState;
87  import org.apache.hadoop.hbase.master.RegionState.State;
88  import org.apache.hadoop.hbase.master.RegionStates;
89  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
90  import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
91  import org.apache.hadoop.hbase.regionserver.compactions.NoLimitCompactionThroughputController;
92  import org.apache.hadoop.hbase.security.User;
93  import org.apache.hadoop.hbase.testclassification.LargeTests;
94  import org.apache.hadoop.hbase.util.Bytes;
95  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
96  import org.apache.hadoop.hbase.util.FSUtils;
97  import org.apache.hadoop.hbase.util.HBaseFsck;
98  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
99  import org.apache.hadoop.hbase.util.PairOfSameType;
100 import org.apache.hadoop.hbase.util.Threads;
101 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
102 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
103 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
104 import org.apache.zookeeper.KeeperException;
105 import org.apache.zookeeper.KeeperException.NodeExistsException;
106 import org.apache.zookeeper.data.Stat;
107 import org.junit.After;
108 import org.junit.AfterClass;
109 import org.junit.Assert;
110 import org.junit.Before;
111 import org.junit.BeforeClass;
112 import org.junit.Test;
113 import org.junit.experimental.categories.Category;
114 
115 import com.google.protobuf.ServiceException;
116 
117 /**
118  * Like TestSplitTransaction in that we're testing {@link SplitTransactionImpl}
119  * only the below tests are against a running cluster where TestSplitTransaction
120  * is tests against a bare {@link HRegion}.
121  */
122 @Category(LargeTests.class)
123 @SuppressWarnings("deprecation")
124 public class TestSplitTransactionOnCluster {
125   private static final Log LOG =
126     LogFactory.getLog(TestSplitTransactionOnCluster.class);
127   private HBaseAdmin admin = null;
128   private MiniHBaseCluster cluster = null;
129   private static final int NB_SERVERS = 3;
130   private static CountDownLatch latch = new CountDownLatch(1);
131   private static volatile boolean secondSplit = false;
132   private static volatile boolean callRollBack = false;
133   private static volatile boolean firstSplitCompleted = false;
134   private static boolean useZKForAssignment;
135 
136   static final HBaseTestingUtility TESTING_UTIL =
137     new HBaseTestingUtility();
138 
139   static void setupOnce() throws Exception {
140     TESTING_UTIL.getConfiguration().setInt("hbase.balancer.period", 60000);
141     useZKForAssignment = TESTING_UTIL.getConfiguration().getBoolean(
142       "hbase.assignment.usezk", true);
143     TESTING_UTIL.startMiniCluster(NB_SERVERS);
144   }
145 
146   @BeforeClass public static void before() throws Exception {
147     // Use ZK for region assignment
148     TESTING_UTIL.getConfiguration().setBoolean("hbase.assignment.usezk", true);
149     setupOnce();
150   }
151 
152   @AfterClass public static void after() throws Exception {
153     TESTING_UTIL.shutdownMiniCluster();
154   }
155 
156   @Before public void setup() throws IOException {
157     TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS);
158     this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
159     this.cluster = TESTING_UTIL.getMiniHBaseCluster();
160   }
161 
162   @After
163   public void tearDown() throws Exception {
164     this.admin.close();
165   }
166 
167   private HRegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions) {
168     assertEquals(1, regions.size());
169     HRegionInfo hri = regions.get(0).getRegionInfo();
170     return waitOnRIT(hri);
171   }
172 
173   /**
174    * Often region has not yet fully opened.  If we try to use it -- do a move for instance -- it
175    * will fail silently if the region is not yet opened.
176    * @param hri Region to check if in Regions In Transition... wait until out of transition before
177    * returning
178    * @return Passed in <code>hri</code>
179    */
180   private HRegionInfo waitOnRIT(final HRegionInfo hri) {
181     // Close worked but we are going to open the region elsewhere.  Before going on, make sure
182     // this completes.
183     while (TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
184         getRegionStates().isRegionInTransition(hri)) {
185       LOG.info("Waiting on region in transition: " +
186         TESTING_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates().
187           getRegionTransitionState(hri));
188       Threads.sleep(10);
189     }
190     return hri;
191   }
192 
193   @Test(timeout = 60000)
194   public void testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack() throws Exception {
195     final TableName tableName =
196         TableName.valueOf("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack");
197 
198     if (!useZKForAssignment) {
199       // This test doesn't apply if not using ZK for assignment
200       return;
201     }
202 
203     try {
204       // Create table then get the single region for our new table.
205       HTable t = createTableAndWait(tableName, Bytes.toBytes("cf"));
206       final List<HRegion> regions = cluster.getRegions(tableName);
207       HRegionInfo hri = getAndCheckSingleTableRegion(regions);
208       int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo()
209         .getRegionName());
210       final HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
211       insertData(tableName, admin, t);
212       t.close();
213 
214       // Turn off balancer so it doesn't cut in and mess up our placements.
215       this.admin.setBalancerRunning(false, true);
216       // Turn off the meta scanner so it don't remove parent on us.
217       cluster.getMaster().setCatalogJanitorEnabled(false);
218 
219       // find a splittable region
220       final HRegion region = findSplittableRegion(regions);
221       assertTrue("not able to find a splittable region", region != null);
222       MockedCoordinatedStateManager cp = new MockedCoordinatedStateManager();
223       cp.initialize(regionServer, region);
224       cp.start();
225       regionServer.csm = cp;
226 
227       new Thread() {
228         @Override
229         public void run() {
230           SplitTransaction st = null;
231           st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
232           try {
233             st.prepare();
234             st.execute(regionServer, regionServer);
235           } catch (IOException e) {
236 
237           }
238         }
239       }.start();
240       for (int i = 0; !callRollBack && i < 100; i++) {
241         Thread.sleep(100);
242       }
243       assertTrue("Waited too long for rollback", callRollBack);
244       SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row3"));
245       try {
246         secondSplit = true;
247         // make region splittable
248         region.initialize();
249         st.prepare();
250         st.execute(regionServer, regionServer);
251       } catch (IOException e) {
252         LOG.debug("Rollback started :"+ e.getMessage());
253         st.rollback(regionServer, regionServer);
254       }
255       for (int i=0; !firstSplitCompleted && i<100; i++) {
256         Thread.sleep(100);
257       }
258       assertTrue("fist split did not complete", firstSplitCompleted);
259 
260       RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
261       Map<String, RegionState> rit = regionStates.getRegionsInTransition();
262 
263       for (int i=0; rit.containsKey(hri.getTable()) && i<100; i++) {
264         Thread.sleep(100);
265       }
266       assertFalse("region still in transition", rit.containsKey(
267           rit.containsKey(hri.getTable())));
268 
269       List<Region> onlineRegions = regionServer.getOnlineRegions(tableName);
270       // Region server side split is successful.
271       assertEquals("The parent region should be splitted", 2, onlineRegions.size());
272       //Should be present in RIT
273       List<HRegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager()
274           .getRegionStates().getRegionsOfTable(tableName);
275       // Master side should also reflect the same
276       assertEquals("No of regions in master", 2, regionsOfTable.size());
277     } finally {
278       admin.setBalancerRunning(true, false);
279       secondSplit = false;
280       firstSplitCompleted = false;
281       callRollBack = false;
282       cluster.getMaster().setCatalogJanitorEnabled(true);
283       TESTING_UTIL.deleteTable(tableName);
284     }
285   }
286 
287   @Test(timeout = 60000)
288   public void testRITStateForRollback() throws Exception {
289     final TableName tableName =
290         TableName.valueOf("testRITStateForRollback");
291     try {
292       // Create table then get the single region for our new table.
293       Table t = createTableAndWait(tableName, Bytes.toBytes("cf"));
294       final List<HRegion> regions = cluster.getRegions(tableName);
295       final HRegionInfo hri = getAndCheckSingleTableRegion(regions);
296       insertData(tableName, admin, t);
297       t.close();
298 
299       // Turn off balancer so it doesn't cut in and mess up our placements.
300       this.admin.setBalancerRunning(false, true);
301       // Turn off the meta scanner so it don't remove parent on us.
302       cluster.getMaster().setCatalogJanitorEnabled(false);
303 
304       // find a splittable region
305       final HRegion region = findSplittableRegion(regions);
306       assertTrue("not able to find a splittable region", region != null);
307 
308       // install region co-processor to fail splits
309       region.getCoprocessorHost().load(FailingSplitRegionObserver.class,
310         Coprocessor.PRIORITY_USER, region.getBaseConf());
311 
312       // split async
313       this.admin.split(region.getRegionInfo().getRegionName(), new byte[] {42});
314 
315       // we have to wait until the SPLITTING state is seen by the master
316       FailingSplitRegionObserver observer = (FailingSplitRegionObserver) region
317           .getCoprocessorHost().findCoprocessor(FailingSplitRegionObserver.class.getName());
318       assertNotNull(observer);
319       observer.latch.await();
320 
321       LOG.info("Waiting for region to come out of RIT");
322       TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
323         @Override
324         public boolean evaluate() throws Exception {
325           RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
326           Map<String, RegionState> rit = regionStates.getRegionsInTransition();
327           return (rit.size() == 0);
328         }
329       });
330     } finally {
331       admin.setBalancerRunning(true, false);
332       cluster.getMaster().setCatalogJanitorEnabled(true);
333       TESTING_UTIL.deleteTable(tableName);
334     }
335   }
336   @Test(timeout = 60000)
337   public void testSplitFailedCompactionAndSplit() throws Exception {
338     final TableName tableName = TableName.valueOf("testSplitFailedCompactionAndSplit");
339     Configuration conf = TESTING_UTIL.getConfiguration();
340     try {
341       HBaseAdmin admin = new HBaseAdmin(conf);
342       // Create table then get the single region for our new table.
343       HTableDescriptor htd = new HTableDescriptor(tableName);
344       byte[] cf = Bytes.toBytes("cf");
345       htd.addFamily(new HColumnDescriptor(cf));
346       admin.createTable(htd);
347 
348       for (int i = 0; cluster.getRegions(tableName).size() == 0 && i < 100; i++) {
349         Thread.sleep(100);
350       }
351       assertEquals(1, cluster.getRegions(tableName).size());
352 
353       HRegion region = cluster.getRegions(tableName).get(0);
354       Store store = region.getStore(cf);
355       int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
356       HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
357 
358       Table t = new HTable(conf, tableName);
359       // insert data
360       insertData(tableName, admin, t);
361       insertData(tableName, admin, t);
362 
363       int fileNum = store.getStorefiles().size();
364       // 0, Compaction Request
365       store.triggerMajorCompaction();
366       CompactionContext cc = store.requestCompaction();
367       assertNotNull(cc);
368       // 1, A timeout split
369       // 1.1 close region
370       assertEquals(2, region.close(false).get(cf).size());
371       // 1.2 rollback and Region initialize again
372       region.initialize();
373 
374       // 2, Run Compaction cc
375       assertFalse(region.compact(cc, store, NoLimitCompactionThroughputController.INSTANCE));
376       assertTrue(fileNum > store.getStorefiles().size());
377 
378       // 3, Split
379       SplitTransaction st = new SplitTransactionImpl(region, Bytes.toBytes("row3"));
380       assertTrue(st.prepare());
381       st.execute(regionServer, regionServer);
382       LOG.info("Waiting for region to come out of RIT");
383       TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
384         @Override
385         public boolean evaluate() throws Exception {
386           RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
387           Map<String, RegionState> rit = regionStates.getRegionsInTransition();
388           return (rit.size() == 0);
389         }
390       });
391       assertEquals(2, cluster.getRegions(tableName).size());
392     } finally {
393       TESTING_UTIL.deleteTable(tableName);
394     }
395   }
396 
397   public static class FailingSplitRegionObserver extends BaseRegionObserver {
398     volatile CountDownLatch latch;
399     volatile CountDownLatch postSplit;
400     @Override
401     public void start(CoprocessorEnvironment e) throws IOException {
402       latch = new CountDownLatch(1);
403       postSplit = new CountDownLatch(1);
404     }
405     @Override
406     public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
407         byte[] splitKey, List<Mutation> metaEntries) throws IOException {
408       latch.countDown();
409       LOG.info("Causing rollback of region split");
410       throw new IOException("Causing rollback of region split");
411     }
412     @Override
413     public void postCompleteSplit(ObserverContext<RegionCoprocessorEnvironment> ctx)
414         throws IOException {
415       postSplit.countDown();
416       LOG.info("postCompleteSplit called");
417     }
418   }
419 
420  /**
421    * A test that intentionally has master fail the processing of the split message.
422    * Tests that the regionserver split ephemeral node gets cleaned up if it
423    * crashes and that after we process server shutdown, the daughters are up on
424    * line.
425    * @throws IOException
426    * @throws InterruptedException
427    * @throws NodeExistsException
428    * @throws KeeperException
429    * @throws DeserializationException
430    */
431   @Test (timeout = 300000) public void testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling()
432   throws IOException, InterruptedException, NodeExistsException, KeeperException,
433       DeserializationException, ServiceException {
434     final TableName tableName =
435       TableName.valueOf("testRSSplitEphemeralsDisappearButDaughtersAreOnlinedAfterShutdownHandling");
436 
437     // Create table then get the single region for our new table.
438     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
439     List<HRegion> regions = cluster.getRegions(tableName);
440     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
441 
442     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
443 
444     // Turn off balancer so it doesn't cut in and mess up our placements.
445     this.admin.setBalancerRunning(false, true);
446     // Turn off the meta scanner so it don't remove parent on us.
447     cluster.getMaster().setCatalogJanitorEnabled(false);
448     try {
449       // Add a bit of load up into the table so splittable.
450       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
451       // Get region pre-split.
452       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
453       printOutRegions(server, "Initial regions: ");
454       int regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
455       // Now, before we split, set special flag in master, a flag that has
456       // it FAIL the processing of split.
457       AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
458       // Now try splitting and it should work.
459       split(hri, server, regionCount);
460 
461       String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(),
462         hri.getEncodedName());
463       RegionTransition rt = null;
464       Stat stats = null;
465       List<HRegion> daughters = null;
466       if (useZKForAssignment) {
467         daughters = checkAndGetDaughters(tableName);
468 
469         // Wait till the znode moved to SPLIT
470         for (int i=0; i<100; i++) {
471           stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
472           rt = RegionTransition.parseFrom(ZKAssign.getData(TESTING_UTIL.getZooKeeperWatcher(),
473             hri.getEncodedName()));
474           if (rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)) break;
475           Thread.sleep(100);
476         }
477         LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats);
478         assertTrue(rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT));
479         // Now crash the server, for ZK-less assignment, the server is auto aborted
480         cluster.abortRegionServer(tableRegionIndex);
481       }
482       waitUntilRegionServerDead();
483       awaitDaughters(tableName, 2);
484       if (useZKForAssignment) {
485         regions = cluster.getRegions(tableName);
486         for (HRegion r: regions) {
487           assertTrue(daughters.contains(r));
488         }
489 
490         // Finally assert that the ephemeral SPLIT znode was cleaned up.
491         for (int i=0; i<100; i++) {
492           // wait a bit (10s max) for the node to disappear
493           stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
494           if (stats == null) break;
495           Thread.sleep(100);
496         }
497         LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats);
498         assertTrue(stats == null);
499       }
500     } finally {
501       // Set this flag back.
502       AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
503       cluster.getMaster().getAssignmentManager().regionOffline(hri);
504       admin.setBalancerRunning(true, false);
505       cluster.getMaster().setCatalogJanitorEnabled(true);
506       cluster.startRegionServer();
507       t.close();
508       TESTING_UTIL.deleteTable(tableName);
509     }
510   }
511 
512   @Test (timeout = 300000) public void testExistingZnodeBlocksSplitAndWeRollback()
513   throws IOException, InterruptedException, NodeExistsException, KeeperException, ServiceException {
514     final TableName tableName =
515         TableName.valueOf("testExistingZnodeBlocksSplitAndWeRollback");
516 
517     // Create table then get the single region for our new table.
518     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
519     List<HRegion> regions = cluster.getRegions(tableName);
520     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
521 
522     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
523 
524     RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
525 
526     // Turn off balancer so it doesn't cut in and mess up our placements.
527     this.admin.setBalancerRunning(false, true);
528     // Turn off the meta scanner so it don't remove parent on us.
529     cluster.getMaster().setCatalogJanitorEnabled(false);
530     try {
531       // Add a bit of load up into the table so splittable.
532       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
533       // Get region pre-split.
534       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
535       printOutRegions(server, "Initial regions: ");
536       int regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
537       // Insert into zk a blocking znode, a znode of same name as region
538       // so it gets in way of our splitting.
539       ServerName fakedServer = ServerName.valueOf("any.old.server", 1234, -1);
540       if (useZKForAssignment) {
541         ZKAssign.createNodeClosing(TESTING_UTIL.getZooKeeperWatcher(),
542           hri, fakedServer);
543       } else {
544         regionStates.updateRegionState(hri, RegionState.State.CLOSING);
545       }
546       // Now try splitting.... should fail.  And each should successfully
547       // rollback.
548       this.admin.split(hri.getRegionNameAsString());
549       this.admin.split(hri.getRegionNameAsString());
550       this.admin.split(hri.getRegionNameAsString());
551       // Wait around a while and assert count of regions remains constant.
552       for (int i = 0; i < 10; i++) {
553         Thread.sleep(100);
554         assertEquals(regionCount, ProtobufUtil.getOnlineRegions(
555           server.getRSRpcServices()).size());
556       }
557       if (useZKForAssignment) {
558         // Now clear the zknode
559         ZKAssign.deleteClosingNode(TESTING_UTIL.getZooKeeperWatcher(),
560           hri, fakedServer);
561       } else {
562         regionStates.regionOnline(hri, server.getServerName());
563       }
564       // Now try splitting and it should work.
565       split(hri, server, regionCount);
566       // Get daughters
567       checkAndGetDaughters(tableName);
568       // OK, so split happened after we cleared the blocking node.
569     } finally {
570       admin.setBalancerRunning(true, false);
571       cluster.getMaster().setCatalogJanitorEnabled(true);
572       t.close();
573     }
574   }
575 
576   /**
577    * Test that if daughter split on us, we won't do the shutdown handler fixup
578    * just because we can't find the immediate daughter of an offlined parent.
579    * @throws IOException
580    * @throws InterruptedException
581    */
582   @Test (timeout=300000) public void testShutdownFixupWhenDaughterHasSplit()
583   throws IOException, InterruptedException, ServiceException {
584     final TableName tableName =
585         TableName.valueOf("testShutdownFixupWhenDaughterHasSplit");
586 
587     // Create table then get the single region for our new table.
588     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
589     List<HRegion> regions = cluster.getRegions(tableName);
590     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
591 
592     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
593 
594     // Turn off balancer so it doesn't cut in and mess up our placements.
595     this.admin.setBalancerRunning(false, true);
596     // Turn off the meta scanner so it don't remove parent on us.
597     cluster.getMaster().setCatalogJanitorEnabled(false);
598     try {
599       // Add a bit of load up into the table so splittable.
600       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
601       // Get region pre-split.
602       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
603       printOutRegions(server, "Initial regions: ");
604       int regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
605       // Now split.
606       split(hri, server, regionCount);
607       // Get daughters
608       List<HRegion> daughters = checkAndGetDaughters(tableName);
609       // Now split one of the daughters.
610       regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
611       HRegionInfo daughter = daughters.get(0).getRegionInfo();
612       LOG.info("Daughter we are going to split: " + daughter);
613       // Compact first to ensure we have cleaned up references -- else the split
614       // will fail.
615       this.admin.compact(daughter.getRegionName());
616       daughters = cluster.getRegions(tableName);
617       HRegion daughterRegion = null;
618       for (HRegion r: daughters) {
619         if (r.getRegionInfo().equals(daughter)) {
620           daughterRegion = r;
621           LOG.info("Found matching HRI: " + daughterRegion);
622           break;
623         }
624       }
625       assertTrue(daughterRegion != null);
626       for (int i=0; i<100; i++) {
627         if (!daughterRegion.hasReferences()) break;
628         Threads.sleep(100);
629       }
630       assertFalse("Waiting for reference to be compacted", daughterRegion.hasReferences());
631       LOG.info("Daughter hri before split (has been compacted): " + daughter);
632       split(daughter, server, regionCount);
633       // Get list of daughters
634       daughters = cluster.getRegions(tableName);
635       for (HRegion d: daughters) {
636         LOG.info("Regions before crash: " + d);
637       }
638       // Now crash the server
639       cluster.abortRegionServer(tableRegionIndex);
640       waitUntilRegionServerDead();
641       awaitDaughters(tableName, daughters.size());
642       // Assert daughters are online and ONLY the original daughters -- that
643       // fixup didn't insert one during server shutdown recover.
644       regions = cluster.getRegions(tableName);
645       for (HRegion d: daughters) {
646         LOG.info("Regions after crash: " + d);
647       }
648       assertEquals(daughters.size(), regions.size());
649       for (HRegion r: regions) {
650         LOG.info("Regions post crash " + r);
651         assertTrue("Missing region post crash " + r, daughters.contains(r));
652       }
653     } finally {
654       admin.setBalancerRunning(true, false);
655       cluster.getMaster().setCatalogJanitorEnabled(true);
656       t.close();
657     }
658   }
659 
660   @Test(timeout = 180000)
661   public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
662     TableName userTableName =
663         TableName.valueOf("testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles");
664     HTableDescriptor htd = new HTableDescriptor(userTableName);
665     HColumnDescriptor hcd = new HColumnDescriptor("col");
666     htd.addFamily(hcd);
667     admin.createTable(htd);
668     Table table = new HTable(TESTING_UTIL.getConfiguration(), userTableName);
669     try {
670       for (int i = 0; i <= 5; i++) {
671         String row = "row" + i;
672         Put p = new Put(row.getBytes());
673         String val = "Val" + i;
674         p.add("col".getBytes(), "ql".getBytes(), val.getBytes());
675         table.put(p);
676         admin.flush(userTableName.getName());
677         Delete d = new Delete(row.getBytes());
678         // Do a normal delete
679         table.delete(d);
680         admin.flush(userTableName.getName());
681       }
682       admin.majorCompact(userTableName.getName());
683       List<HRegionInfo> regionsOfTable = TESTING_UTIL.getMiniHBaseCluster()
684           .getMaster().getAssignmentManager().getRegionStates()
685           .getRegionsOfTable(userTableName);
686       HRegionInfo hRegionInfo = regionsOfTable.get(0);
687       Put p = new Put("row6".getBytes());
688       p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
689       table.put(p);
690       p = new Put("row7".getBytes());
691       p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
692       table.put(p);
693       p = new Put("row8".getBytes());
694       p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
695       table.put(p);
696       admin.flush(userTableName.getName());
697       admin.split(hRegionInfo.getRegionName(), "row7".getBytes());
698       regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
699           .getAssignmentManager().getRegionStates()
700           .getRegionsOfTable(userTableName);
701 
702       while (regionsOfTable.size() != 2) {
703         Thread.sleep(2000);
704         regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
705             .getAssignmentManager().getRegionStates()
706             .getRegionsOfTable(userTableName);
707       }
708       Assert.assertEquals(2, regionsOfTable.size());
709       Scan s = new Scan();
710       ResultScanner scanner = table.getScanner(s);
711       int mainTableCount = 0;
712       for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
713         mainTableCount++;
714       }
715       Assert.assertEquals(3, mainTableCount);
716     } finally {
717       table.close();
718     }
719   }
720 
721   /**
722    * Noop Abortable implementation used below in tests.
723    */
724   static class UselessTestAbortable implements Abortable {
725     boolean aborted = false;
726     @Override
727     public void abort(String why, Throwable e) {
728       LOG.warn("ABORTED (But nothing to abort): why=" + why, e);
729       aborted = true;
730     }
731 
732     @Override
733     public boolean isAborted() {
734       return this.aborted;
735     }
736   }
737 
738   /**
739    * Verifies HBASE-5806.  When splitting is partially done and the master goes down
740    * when the SPLIT node is in either SPLIT or SPLITTING state.
741    *
742    * @throws IOException
743    * @throws InterruptedException
744    * @throws NodeExistsException
745    * @throws KeeperException
746    * @throws DeserializationException
747    */
748   @Test(timeout = 400000)
749   public void testMasterRestartWhenSplittingIsPartial()
750       throws IOException, InterruptedException, NodeExistsException,
751       KeeperException, DeserializationException, ServiceException {
752     final TableName tableName = TableName.valueOf("testMasterRestartWhenSplittingIsPartial");
753 
754     if (!useZKForAssignment) {
755       // This test doesn't apply if not using ZK for assignment
756       return;
757     }
758 
759     // Create table then get the single region for our new table.
760     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
761     List<HRegion> regions = cluster.getRegions(tableName);
762     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
763 
764     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
765 
766     // Turn off balancer so it doesn't cut in and mess up our placements.
767     this.admin.setBalancerRunning(false, true);
768     // Turn off the meta scanner so it don't remove parent on us.
769     cluster.getMaster().setCatalogJanitorEnabled(false);
770     ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
771       "testMasterRestartWhenSplittingIsPartial", new UselessTestAbortable());
772     try {
773       // Add a bit of load up into the table so splittable.
774       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
775       // Get region pre-split.
776       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
777       printOutRegions(server, "Initial regions: ");
778       // Now, before we split, set special flag in master, a flag that has
779       // it FAIL the processing of split.
780       AssignmentManager.TEST_SKIP_SPLIT_HANDLING = true;
781       // Now try splitting and it should work.
782 
783       this.admin.split(hri.getRegionNameAsString());
784       checkAndGetDaughters(tableName);
785       // Assert the ephemeral node is up in zk.
786       String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
787       Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
788       LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
789           + stats);
790       byte[] bytes = ZKAssign.getData(zkw, hri.getEncodedName());
791       RegionTransition rtd = RegionTransition.parseFrom(bytes);
792       // State could be SPLIT or SPLITTING.
793       assertTrue(rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)
794           || rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING));
795 
796       // abort and wait for new master.
797       MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
798 
799       this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
800 
801       // Update the region to be offline and split, so that HRegionInfo#equals
802       // returns true in checking rebuilt region states map.
803       hri.setOffline(true);
804       hri.setSplit(true);
805       ServerName regionServerOfRegion = master.getAssignmentManager()
806         .getRegionStates().getRegionServerOfRegion(hri);
807       assertTrue(regionServerOfRegion != null);
808 
809       // Remove the block so that split can move ahead.
810       AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
811       String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
812       Stat stat = new Stat();
813       byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
814       // ZKUtil.create
815       for (int i=0; data != null && i<60; i++) {
816         Thread.sleep(1000);
817         data = ZKUtil.getDataNoWatch(zkw, node, stat);
818       }
819       assertNull("Waited too long for ZK node to be removed: "+node, data);
820       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
821       assertTrue("Split parent should be in SPLIT state",
822         regionStates.isRegionInState(hri, State.SPLIT));
823       regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
824       assertTrue(regionServerOfRegion == null);
825     } finally {
826       // Set this flag back.
827       AssignmentManager.TEST_SKIP_SPLIT_HANDLING = false;
828       admin.setBalancerRunning(true, false);
829       cluster.getMaster().setCatalogJanitorEnabled(true);
830       t.close();
831       zkw.close();
832     }
833   }
834 
835   /**
836    * Verifies HBASE-5806.  Here the case is that splitting is completed but before the
837    * CJ could remove the parent region the master is killed and restarted.
838    * @throws IOException
839    * @throws InterruptedException
840    * @throws NodeExistsException
841    * @throws KeeperException
842    */
843   @Test (timeout = 300000)
844   public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
845       throws IOException, InterruptedException, NodeExistsException,
846       KeeperException, ServiceException {
847     final TableName tableName = TableName
848         .valueOf("testMasterRestartAtRegionSplitPendingCatalogJanitor");
849 
850     // Create table then get the single region for our new table.
851     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
852     List<HRegion> regions = cluster.getRegions(tableName);
853     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
854 
855     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
856 
857     // Turn off balancer so it doesn't cut in and mess up our placements.
858     this.admin.setBalancerRunning(false, true);
859     // Turn off the meta scanner so it don't remove parent on us.
860     cluster.getMaster().setCatalogJanitorEnabled(false);
861     ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
862       "testMasterRestartAtRegionSplitPendingCatalogJanitor", new UselessTestAbortable());
863     try {
864       // Add a bit of load up into the table so splittable.
865       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
866       // Get region pre-split.
867       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
868       printOutRegions(server, "Initial regions: ");
869 
870       this.admin.split(hri.getRegionNameAsString());
871       checkAndGetDaughters(tableName);
872       // Assert the ephemeral node is up in zk.
873       String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
874       Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
875       LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
876           + stats);
877       String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
878       Stat stat = new Stat();
879       byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
880       // ZKUtil.create
881       for (int i=0; data != null && i<60; i++) {
882         Thread.sleep(1000);
883         data = ZKUtil.getDataNoWatch(zkw, node, stat);
884       }
885       assertNull("Waited too long for ZK node to be removed: "+node, data);
886 
887       MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
888 
889       this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
890 
891       // Update the region to be offline and split, so that HRegionInfo#equals
892       // returns true in checking rebuilt region states map.
893       hri.setOffline(true);
894       hri.setSplit(true);
895       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
896       assertTrue("Split parent should be in SPLIT state",
897         regionStates.isRegionInState(hri, State.SPLIT));
898       ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
899       assertTrue(regionServerOfRegion == null);
900     } finally {
901       this.admin.setBalancerRunning(true, false);
902       cluster.getMaster().setCatalogJanitorEnabled(true);
903       t.close();
904       zkw.close();
905     }
906   }
907 
908   /**
909    *
910    * While transitioning node from RS_ZK_REGION_SPLITTING to
911    * RS_ZK_REGION_SPLITTING during region split,if zookeper went down split always
912    * fails for the region. HBASE-6088 fixes this scenario.
913    * This test case is to test the znode is deleted(if created) or not in roll back.
914    *
915    * @throws IOException
916    * @throws InterruptedException
917    * @throws KeeperException
918    */
919   @Test(timeout = 60000)
920   public void testSplitBeforeSettingSplittingInZK() throws Exception,
921       InterruptedException, KeeperException {
922     testSplitBeforeSettingSplittingInZKInternals();
923   }
924 
925   @Test(timeout = 60000)
926   public void testTableExistsIfTheSpecifiedTableRegionIsSplitParent() throws Exception {
927     ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TESTING_UTIL);
928     final TableName tableName =
929         TableName.valueOf("testTableExistsIfTheSpecifiedTableRegionIsSplitParent");
930     // Create table then get the single region for our new table.
931     Table t = createTableAndWait(tableName, Bytes.toBytes("cf"));
932     List<HRegion> regions = null;
933     try {
934       regions = cluster.getRegions(tableName);
935       int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo()
936         .getRegionName());
937       HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
938       insertData(tableName, admin, t);
939       // Turn off balancer so it doesn't cut in and mess up our placements.
940       admin.setBalancerRunning(false, true);
941       // Turn off the meta scanner so it don't remove parent on us.
942       cluster.getMaster().setCatalogJanitorEnabled(false);
943       boolean tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
944         tableName);
945       assertEquals("The specified table should present.", true, tableExists);
946       final HRegion region = findSplittableRegion(regions);
947       assertTrue("not able to find a splittable region", region != null);
948       SplitTransactionImpl st = new SplitTransactionImpl(region, Bytes.toBytes("row2"));
949       try {
950         st.prepare();
951         st.createDaughters(regionServer, regionServer, null);
952       } catch (IOException e) {
953 
954       }
955       tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
956         tableName);
957       assertEquals("The specified table should present.", true, tableExists);
958       Map<String, RegionState> rit = cluster.getMaster().getAssignmentManager().getRegionStates()
959           .getRegionsInTransition();
960       assertTrue(rit.size() == 3);
961       cluster.getMaster().getAssignmentManager().regionOffline(st.getFirstDaughter());
962       cluster.getMaster().getAssignmentManager().regionOffline(st.getSecondDaughter());
963       cluster.getMaster().getAssignmentManager().regionOffline(region.getRegionInfo());
964       rit = cluster.getMaster().getAssignmentManager().getRegionStates().getRegionsInTransition();
965       assertTrue(rit.size() == 0);
966     }
967     finally {
968       admin.setBalancerRunning(true, false);
969       cluster.getMaster().setCatalogJanitorEnabled(true);
970       t.close();
971       TESTING_UTIL.deleteTable(tableName);
972     }
973   }
974 
975   @Test
976   public void testSplitWithRegionReplicas() throws Exception {
977     ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TESTING_UTIL);
978     final TableName tableName =
979         TableName.valueOf("foobar");
980     HTableDescriptor htd = TESTING_UTIL.createTableDescriptor("foobar");
981     htd.setRegionReplication(2);
982     htd.addCoprocessor(SlowMeCopro.class.getName());
983     // Create table then get the single region for our new table.
984     HTable t = TESTING_UTIL.createTable(htd, new byte[][]{Bytes.toBytes("cf")},
985         TESTING_UTIL.getConfiguration());
986     int count;
987     List<HRegion> oldRegions;
988     do {
989       oldRegions = cluster.getRegions(tableName);
990       Thread.sleep(10);
991     } while (oldRegions.size() != 2);
992     for (HRegion h : oldRegions) LOG.debug("OLDREGION " + h.getRegionInfo());
993     try {
994       int regionServerIndex = cluster.getServerWith(oldRegions.get(0).getRegionInfo()
995         .getRegionName());
996       HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
997       insertData(tableName, admin, t);
998       // Turn off balancer so it doesn't cut in and mess up our placements.
999       admin.setBalancerRunning(false, true);
1000       // Turn off the meta scanner so it don't remove parent on us.
1001       cluster.getMaster().setCatalogJanitorEnabled(false);
1002       boolean tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
1003           tableName);
1004       assertEquals("The specified table should be present.", true, tableExists);
1005       final HRegion region = findSplittableRegion(oldRegions);
1006       regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
1007       regionServer = cluster.getRegionServer(regionServerIndex);
1008       assertTrue("not able to find a splittable region", region != null);
1009       String node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1010           region.getRegionInfo().getEncodedName());
1011       regionServer.getZooKeeper().sync(node);
1012       SplitTransactionImpl st = new SplitTransactionImpl(region, Bytes.toBytes("row2"));
1013       try {
1014         st.prepare();
1015         st.execute(regionServer, regionServer);
1016       } catch (IOException e) {
1017         e.printStackTrace();
1018         fail("Split execution should have succeeded with no exceptions thrown " + e);
1019       }
1020       //TESTING_UTIL.waitUntilAllRegionsAssigned(tableName);
1021       List<HRegion> newRegions;
1022       do {
1023         newRegions = cluster.getRegions(tableName);
1024         for (HRegion h : newRegions) LOG.debug("NEWREGION " + h.getRegionInfo());
1025         Thread.sleep(1000);
1026       } while ((newRegions.contains(oldRegions.get(0)) || newRegions.contains(oldRegions.get(1)))
1027           || newRegions.size() != 4);
1028       tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
1029           tableName);
1030       assertEquals("The specified table should be present.", true, tableExists);
1031       // exists works on stale and we see the put after the flush
1032       byte[] b1 = "row1".getBytes();
1033       Get g = new Get(b1);
1034       g.setConsistency(Consistency.STRONG);
1035       // The following GET will make a trip to the meta to get the new location of the 1st daughter
1036       // In the process it will also get the location of the replica of the daughter (initially
1037       // pointing to the parent's replica)
1038       Result r = t.get(g);
1039       Assert.assertFalse(r.isStale());
1040       LOG.info("exists stale after flush done");
1041 
1042       SlowMeCopro.getCdl().set(new CountDownLatch(1));
1043       g = new Get(b1);
1044       g.setConsistency(Consistency.TIMELINE);
1045       // This will succeed because in the previous GET we get the location of the replica
1046       r = t.get(g);
1047       Assert.assertTrue(r.isStale());
1048       SlowMeCopro.getCdl().get().countDown();
1049     } finally {
1050       SlowMeCopro.getCdl().get().countDown();
1051       admin.setBalancerRunning(true, false);
1052       cluster.getMaster().setCatalogJanitorEnabled(true);
1053       t.close();
1054     }
1055   }
1056 
1057   private void insertData(final TableName tableName, HBaseAdmin admin, Table t) throws IOException,
1058       InterruptedException {
1059     Put p = new Put(Bytes.toBytes("row1"));
1060     p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1"));
1061     t.put(p);
1062     p = new Put(Bytes.toBytes("row2"));
1063     p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2"));
1064     t.put(p);
1065     p = new Put(Bytes.toBytes("row3"));
1066     p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3"));
1067     t.put(p);
1068     p = new Put(Bytes.toBytes("row4"));
1069     p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4"));
1070     t.put(p);
1071     admin.flush(tableName);
1072   }
1073 
1074   /**
1075    * If a table has regions that have no store files in a region, they should split successfully
1076    * into two regions with no store files.
1077    */
1078   @Test(timeout = 60000)
1079   public void testSplitRegionWithNoStoreFiles()
1080       throws Exception {
1081     final TableName tableName =
1082         TableName.valueOf("testSplitRegionWithNoStoreFiles");
1083     // Create table then get the single region for our new table.
1084     createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
1085     List<HRegion> regions = cluster.getRegions(tableName);
1086     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
1087     ensureTableRegionNotOnSameServerAsMeta(admin, hri);
1088     int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo()
1089       .getRegionName());
1090     HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
1091     // Turn off balancer so it doesn't cut in and mess up our placements.
1092     this.admin.setBalancerRunning(false, true);
1093     // Turn off the meta scanner so it don't remove parent on us.
1094     cluster.getMaster().setCatalogJanitorEnabled(false);
1095     try {
1096       // Precondition: we created a table with no data, no store files.
1097       printOutRegions(regionServer, "Initial regions: ");
1098       Configuration conf = cluster.getConfiguration();
1099       HBaseFsck.debugLsr(conf, new Path("/"));
1100       Path rootDir = FSUtils.getRootDir(conf);
1101       FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem();
1102       Map<String, Path> storefiles =
1103           FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
1104       assertEquals("Expected nothing but found " + storefiles.toString(), storefiles.size(), 0);
1105 
1106       // find a splittable region.  Refresh the regions list
1107       regions = cluster.getRegions(tableName);
1108       final HRegion region = findSplittableRegion(regions);
1109       assertTrue("not able to find a splittable region", region != null);
1110 
1111       // Now split.
1112       SplitTransactionImpl st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
1113       try {
1114         st.prepare();
1115         st.execute(regionServer, regionServer);
1116       } catch (IOException e) {
1117         fail("Split execution should have succeeded with no exceptions thrown");
1118       }
1119 
1120       // Postcondition: split the table with no store files into two regions, but still have not
1121       // store files
1122       List<HRegion> daughters = cluster.getRegions(tableName);
1123       assertTrue(daughters.size() == 2);
1124 
1125       // check dirs
1126       HBaseFsck.debugLsr(conf, new Path("/"));
1127       Map<String, Path> storefilesAfter =
1128           FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
1129       assertEquals("Expected nothing but found " + storefilesAfter.toString(),
1130           storefilesAfter.size(), 0);
1131 
1132       hri = region.getRegionInfo(); // split parent
1133       AssignmentManager am = cluster.getMaster().getAssignmentManager();
1134       RegionStates regionStates = am.getRegionStates();
1135       long start = EnvironmentEdgeManager.currentTime();
1136       while (!regionStates.isRegionInState(hri, State.SPLIT)) {
1137         assertFalse("Timed out in waiting split parent to be in state SPLIT",
1138           EnvironmentEdgeManager.currentTime() - start > 60000);
1139         Thread.sleep(500);
1140       }
1141 
1142       // We should not be able to assign it again
1143       am.assign(hri, true, true);
1144       assertFalse("Split region can't be assigned",
1145         regionStates.isRegionInTransition(hri));
1146       assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
1147 
1148       // We should not be able to unassign it either
1149       am.unassign(hri, true, null);
1150       assertFalse("Split region can't be unassigned",
1151         regionStates.isRegionInTransition(hri));
1152       assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
1153     } finally {
1154       admin.setBalancerRunning(true, false);
1155       cluster.getMaster().setCatalogJanitorEnabled(true);
1156     }
1157   }
1158 
1159   @Test(timeout = 180000)
1160   public void testSplitHooksBeforeAndAfterPONR() throws Exception {
1161     TableName firstTable = TableName.valueOf("testSplitHooksBeforeAndAfterPONR_1");
1162     TableName secondTable = TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2");
1163     HColumnDescriptor hcd = new HColumnDescriptor("cf");
1164 
1165     HTableDescriptor desc = new HTableDescriptor(firstTable);
1166     desc.addCoprocessor(MockedRegionObserver.class.getName());
1167     desc.addFamily(hcd);
1168     admin.createTable(desc);
1169     TESTING_UTIL.waitUntilAllRegionsAssigned(firstTable);
1170 
1171     desc = new HTableDescriptor(secondTable);
1172     desc.addFamily(hcd);
1173     admin.createTable(desc);
1174     TESTING_UTIL.waitUntilAllRegionsAssigned(secondTable);
1175 
1176     List<HRegion> firstTableRegions = cluster.getRegions(firstTable);
1177     List<HRegion> secondTableRegions = cluster.getRegions(secondTable);
1178 
1179     // Check that both tables actually have regions.
1180     if (firstTableRegions.size() == 0 || secondTableRegions.size() == 0) {
1181       fail("Each table should have at least one region.");
1182     }
1183     ServerName serverName = cluster.getServerHoldingRegion(firstTable,
1184       firstTableRegions.get(0).getRegionInfo().getRegionName());
1185     admin.move(secondTableRegions.get(0).getRegionInfo().getEncodedNameAsBytes(),
1186       Bytes.toBytes(serverName.getServerName()));
1187     Table table1 = null;
1188     Table table2 = null;
1189     try {
1190       table1 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
1191       table2 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
1192       insertData(firstTable, admin, table1);
1193       insertData(secondTable, admin, table2);
1194       admin.split(firstTable, "row2".getBytes());
1195       firstTableRegions = cluster.getRegions(firstTable);
1196       while (firstTableRegions.size() != 2) {
1197         Thread.sleep(1000);
1198         firstTableRegions = cluster.getRegions(firstTable);
1199       }
1200       assertEquals("Number of regions after split should be 2.", 2, firstTableRegions.size());
1201       secondTableRegions = cluster.getRegions(secondTable);
1202       assertEquals("Number of regions after split should be 2.", 2, secondTableRegions.size());
1203     } finally {
1204       if (table1 != null) {
1205         table1.close();
1206       }
1207       if (table2 != null) {
1208         table2.close();
1209       }
1210       TESTING_UTIL.deleteTable(firstTable);
1211       TESTING_UTIL.deleteTable(secondTable);
1212     }
1213   }
1214 
1215   private void testSplitBeforeSettingSplittingInZKInternals() throws Exception {
1216     final TableName tableName = TableName.valueOf("testSplitBeforeSettingSplittingInZK");
1217     try {
1218       // Create table then get the single region for our new table.
1219       createTableAndWait(tableName, Bytes.toBytes("cf"));
1220 
1221       List<HRegion> regions = awaitTableRegions(tableName);
1222       assertTrue("Table not online", cluster.getRegions(tableName).size() != 0);
1223 
1224       int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo()
1225         .getRegionName());
1226       HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
1227       final HRegion region = findSplittableRegion(regions);
1228       assertTrue("not able to find a splittable region", region != null);
1229       SplitTransactionImpl st = new MockedSplitTransaction(region, Bytes.toBytes("row2")) {
1230         @Override
1231         public PairOfSameType<Region> stepsBeforePONR(final Server server,
1232             final RegionServerServices services, boolean testing) throws IOException {
1233           throw new SplittingNodeCreationFailedException ();
1234         }
1235       };
1236       String node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1237           region.getRegionInfo().getEncodedName());
1238       regionServer.getZooKeeper().sync(node);
1239       for (int i = 0; i < 100; i++) {
1240         // We expect the znode to be deleted by this time. Here the
1241         // znode could be in OPENED state and the
1242         // master has not yet deleted the znode.
1243         if (ZKUtil.checkExists(regionServer.getZooKeeper(), node) != -1) {
1244           Thread.sleep(100);
1245         }
1246       }
1247       try {
1248         st.prepare();
1249         st.execute(regionServer, regionServer);
1250       } catch (IOException e) {
1251         // check for the specific instance in case the Split failed due to the
1252         // existence of the znode in OPENED state.
1253         // This will at least make the test to fail;
1254         assertTrue("Should be instance of CreateSplittingNodeFailedException",
1255             e instanceof SplittingNodeCreationFailedException );
1256         node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1257             region.getRegionInfo().getEncodedName());
1258         {
1259           assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1260         }
1261         assertTrue(st.rollback(regionServer, regionServer));
1262         assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1263       }
1264     } finally {
1265       TESTING_UTIL.deleteTable(tableName);
1266     }
1267   }
1268 
1269   @Test
1270   public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck()
1271       throws Exception {
1272     final TableName tableName =
1273         TableName.valueOf("testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck");
1274     try {
1275       HTableDescriptor htd = new HTableDescriptor(tableName);
1276       htd.addFamily(new HColumnDescriptor("f"));
1277       htd.addFamily(new HColumnDescriptor("i_f"));
1278       htd.setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName());
1279       admin.createTable(htd);
1280       List<HRegion> regions = awaitTableRegions(tableName);
1281       HRegion region = regions.get(0);
1282       for(int i = 3;i<9;i++) {
1283         Put p = new Put(Bytes.toBytes("row"+i));
1284         p.add(Bytes.toBytes("f"), Bytes.toBytes("q"), Bytes.toBytes("value"+i));
1285         p.add(Bytes.toBytes("i_f"), Bytes.toBytes("q"), Bytes.toBytes("value"+i));
1286         region.put(p);
1287       }
1288       region.flush(true);
1289       Store store = region.getStore(Bytes.toBytes("f"));
1290       Collection<StoreFile> storefiles = store.getStorefiles();
1291       assertEquals(storefiles.size(), 1);
1292       assertFalse(region.hasReferences());
1293       Path referencePath =
1294           region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "f",
1295             storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
1296       assertNull(referencePath);
1297       referencePath =
1298           region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "i_f",
1299             storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
1300       assertNotNull(referencePath);
1301     } finally {
1302       TESTING_UTIL.deleteTable(tableName);
1303     }
1304   }
1305   
1306   @Test(timeout = 120000)
1307   public void testFailedSplit() throws Exception {
1308     TableName tableName = TableName.valueOf("testFailedSplit");
1309     byte[] colFamily = Bytes.toBytes("info");
1310     TESTING_UTIL.createTable(tableName, colFamily);
1311     Connection connection = ConnectionFactory.createConnection(TESTING_UTIL.getConfiguration());
1312     HTable table = (HTable) connection.getTable(tableName);
1313     try {
1314       TESTING_UTIL.loadTable(table, colFamily);
1315       List<HRegionInfo> regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
1316       assertTrue(regions.size() == 1);
1317       final HRegion actualRegion = cluster.getRegions(tableName).get(0);
1318       actualRegion.getCoprocessorHost().load(FailingSplitRegionObserver.class,
1319         Coprocessor.PRIORITY_USER, actualRegion.getBaseConf());
1320 
1321       // The following split would fail.
1322       admin.split(tableName);
1323       FailingSplitRegionObserver observer = (FailingSplitRegionObserver) actualRegion
1324           .getCoprocessorHost().findCoprocessor(FailingSplitRegionObserver.class.getName());
1325       assertNotNull(observer);
1326       observer.latch.await();
1327       observer.postSplit.await();
1328       LOG.info("Waiting for region to come out of RIT: " + actualRegion);
1329       TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
1330         @Override
1331         public boolean evaluate() throws Exception {
1332           RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
1333           Map<String, RegionState> rit = regionStates.getRegionsInTransition();
1334           return (rit.size() == 0);
1335         }
1336       });
1337       regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
1338       assertTrue(regions.size() == 1);
1339       RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
1340       Map<String, RegionState> rit = regionStates.getRegionsInTransition();
1341       assertTrue(rit.size() == 0);
1342     } finally {
1343       table.close();
1344       connection.close();
1345       TESTING_UTIL.deleteTable(tableName);
1346     }
1347   }
1348 
1349   @Test (timeout=300000)
1350   public void testSSHCleanupDaugtherRegionsOfAbortedSplit() throws Exception {
1351     TableName table = TableName.valueOf("testSSHCleanupDaugtherRegionsOfAbortedSplit");
1352     try {
1353       HTableDescriptor desc = new HTableDescriptor(table);
1354       desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
1355       admin.createTable(desc);
1356       HTable hTable = new HTable(cluster.getConfiguration(), desc.getTableName());
1357       for(int i = 1; i < 5; i++) {
1358         Put p1 = new Put(("r"+i).getBytes());
1359         p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
1360         hTable.put(p1);
1361       }
1362       admin.flush(desc.getTableName());
1363       List<HRegion> regions = cluster.getRegions(desc.getTableName());
1364       int serverWith = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
1365       HRegionServer regionServer = cluster.getRegionServer(serverWith);
1366       cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
1367       SplitTransactionImpl st = new SplitTransactionImpl(regions.get(0), Bytes.toBytes("r3"));
1368       st.prepare();
1369       st.stepsBeforePONR(regionServer, regionServer, false);
1370       Path tableDir =
1371           FSUtils.getTableDir(cluster.getMaster().getMasterFileSystem().getRootDir(),
1372             desc.getTableName());
1373       tableDir.getFileSystem(cluster.getConfiguration());
1374       List<Path> regionDirs =
1375           FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
1376       assertEquals(3,regionDirs.size());
1377       cluster.startRegionServer();
1378       regionServer.kill();
1379       cluster.getRegionServerThreads().get(serverWith).join();
1380       // Wait until finish processing of shutdown
1381       while (cluster.getMaster().getServerManager().areDeadServersInProgress()) {
1382         Thread.sleep(10);
1383       }
1384       AssignmentManager am = cluster.getMaster().getAssignmentManager();
1385       while(am.getRegionStates().isRegionsInTransition()) {
1386         Thread.sleep(10);
1387       }
1388       assertEquals(am.getRegionStates().getRegionsInTransition().toString(), 0, am
1389           .getRegionStates().getRegionsInTransition().size());
1390       regionDirs =
1391           FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
1392       assertEquals(1,regionDirs.size());
1393     } finally {
1394       TESTING_UTIL.deleteTable(table);
1395     }
1396   }
1397 
1398     public static class MockedCoordinatedStateManager extends ZkCoordinatedStateManager {
1399 
1400         public void initialize(Server server, HRegion region) {
1401           this.server = server;
1402           this.watcher = server.getZooKeeper();
1403           splitTransactionCoordination = new MockedSplitTransactionCoordination(this, watcher, region);
1404           closeRegionCoordination = new ZkCloseRegionCoordination(this, watcher);
1405           openRegionCoordination = new ZkOpenRegionCoordination(this, watcher);
1406         }
1407       }
1408 
1409       public static class MockedSplitTransaction extends SplitTransactionImpl {
1410 
1411         private HRegion currentRegion;
1412         public MockedSplitTransaction(HRegion region, byte[] splitrow) {
1413           super(region, splitrow);
1414           this.currentRegion = region;
1415         }
1416         @Override
1417         public boolean rollback(Server server, RegionServerServices services) throws IOException {
1418           if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1419               .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1420             if(secondSplit){
1421               super.rollback(server, services);
1422               latch.countDown();
1423               return true;
1424             }
1425           }
1426           return super.rollback(server, services);
1427         }
1428 
1429 
1430       }
1431 
1432   public static class MockedSplitTransactionCoordination extends ZKSplitTransactionCoordination {
1433 
1434     private HRegion currentRegion;
1435 
1436     public MockedSplitTransactionCoordination(CoordinatedStateManager coordinationProvider,
1437         ZooKeeperWatcher watcher, HRegion region) {
1438       super(coordinationProvider, watcher);
1439       currentRegion = region;
1440     }
1441 
1442     @Override
1443     public void completeSplitTransaction(RegionServerServices services, Region a, Region b,
1444         SplitTransactionDetails std, Region parent) throws IOException {
1445       if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1446           .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1447         try {
1448           if (!secondSplit){
1449             callRollBack = true;
1450             latch.await();
1451           }
1452         } catch (InterruptedException e) {
1453         }
1454 
1455       }
1456       super.completeSplitTransaction(services, a, b, std, parent);
1457       if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1458           .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1459         firstSplitCompleted = true;
1460       }
1461     }
1462   }
1463 
1464   private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
1465     for (int i = 0; i < 5; ++i) {
1466       for (HRegion r: regions) {
1467         if (r.isSplittable() && r.getRegionInfo().getReplicaId() == 0) {
1468           return(r);
1469         }
1470       }
1471       Thread.sleep(100);
1472     }
1473     return(null);
1474   }
1475 
1476   private List<HRegion> checkAndGetDaughters(TableName tableName)
1477       throws InterruptedException {
1478     List<HRegion> daughters = null;
1479     // try up to 10s
1480     for (int i=0; i<100; i++) {
1481       daughters = cluster.getRegions(tableName);
1482       if (daughters.size() >= 2) break;
1483       Thread.sleep(100);
1484     }
1485     assertTrue(daughters.size() >= 2);
1486     return daughters;
1487   }
1488 
1489   private MockMasterWithoutCatalogJanitor abortAndWaitForMaster()
1490   throws IOException, InterruptedException {
1491     cluster.abortMaster(0);
1492     cluster.waitOnMaster(0);
1493     cluster.getConfiguration().setClass(HConstants.MASTER_IMPL,
1494         MockMasterWithoutCatalogJanitor.class, HMaster.class);
1495     MockMasterWithoutCatalogJanitor master = null;
1496     master = (MockMasterWithoutCatalogJanitor) cluster.startMaster().getMaster();
1497     cluster.waitForActiveAndReadyMaster();
1498     return master;
1499   }
1500 
1501   private void split(final HRegionInfo hri, final HRegionServer server, final int regionCount)
1502       throws IOException, InterruptedException {
1503     this.admin.split(hri.getRegionNameAsString());
1504     try {
1505       for (int i = 0; ProtobufUtil.getOnlineRegions(
1506           server.getRSRpcServices()).size() <= regionCount && i < 300; i++) {
1507         LOG.debug("Waiting on region to split");
1508         Thread.sleep(100);
1509       }
1510 
1511       assertFalse("Waited too long for split",
1512         ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size() <= regionCount);
1513     } catch (RegionServerStoppedException e) {
1514       if (useZKForAssignment) {
1515         // If not using ZK for assignment, the exception may be expected.
1516         LOG.error(e);
1517         throw e;
1518       }
1519     }
1520   }
1521 
1522   /**
1523    * Ensure single table region is not on same server as the single hbase:meta table
1524    * region.
1525    * @param admin
1526    * @param hri
1527    * @return Index of the server hosting the single table region
1528    * @throws UnknownRegionException
1529    * @throws MasterNotRunningException
1530    * @throws org.apache.hadoop.hbase.ZooKeeperConnectionException
1531    * @throws InterruptedException
1532    */
1533   private int ensureTableRegionNotOnSameServerAsMeta(final Admin admin,
1534       final HRegionInfo hri)
1535   throws IOException, MasterNotRunningException,
1536   ZooKeeperConnectionException, InterruptedException {
1537     // Now make sure that the table region is not on same server as that hosting
1538     // hbase:meta  We don't want hbase:meta replay polluting our test when we later crash
1539     // the table region serving server.
1540     int metaServerIndex = cluster.getServerWithMeta();
1541     assertTrue(metaServerIndex != -1);
1542     HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex);
1543     int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1544     assertTrue(tableRegionIndex != -1);
1545     HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
1546     if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
1547       HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
1548       assertNotNull(hrs);
1549       assertNotNull(hri);
1550       LOG.info("Moving " + hri.getRegionNameAsString() + " from " +
1551         metaRegionServer.getServerName() + " to " +
1552         hrs.getServerName() + "; metaServerIndex=" + metaServerIndex);
1553       admin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(hrs.getServerName().toString()));
1554     }
1555     // Wait till table region is up on the server that is NOT carrying hbase:meta.
1556     for (int i = 0; i < 20; i++) {
1557       tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1558       if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
1559       LOG.debug("Waiting on region move off the hbase:meta server; current index " +
1560         tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
1561       Thread.sleep(1000);
1562     }
1563     assertTrue("Region not moved off hbase:meta server", tableRegionIndex != -1
1564         && tableRegionIndex != metaServerIndex);
1565     // Verify for sure table region is not on same server as hbase:meta
1566     tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1567     assertTrue(tableRegionIndex != -1);
1568     assertNotSame(metaServerIndex, tableRegionIndex);
1569     return tableRegionIndex;
1570   }
1571 
1572   /**
1573    * Find regionserver other than the one passed.
1574    * Can't rely on indexes into list of regionservers since crashed servers
1575    * occupy an index.
1576    * @param cluster
1577    * @param notThisOne
1578    * @return A regionserver that is not <code>notThisOne</code> or null if none
1579    * found
1580    */
1581   private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster,
1582       final HRegionServer notThisOne) {
1583     for (RegionServerThread rst: cluster.getRegionServerThreads()) {
1584       HRegionServer hrs = rst.getRegionServer();
1585       if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
1586       if (hrs.isStopping() || hrs.isStopped()) continue;
1587       return hrs;
1588     }
1589     return null;
1590   }
1591 
1592   private void printOutRegions(final HRegionServer hrs, final String prefix)
1593       throws IOException {
1594     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1595     for (HRegionInfo region: regions) {
1596       LOG.info(prefix + region.getRegionNameAsString());
1597     }
1598   }
1599 
1600   private void waitUntilRegionServerDead() throws InterruptedException, InterruptedIOException {
1601     // Wait until the master processes the RS shutdown
1602     for (int i=0; cluster.getMaster().getClusterStatus().
1603         getServers().size() > NB_SERVERS && i<100; i++) {
1604       LOG.info("Waiting on server to go down");
1605       Thread.sleep(100);
1606     }
1607     assertFalse("Waited too long for RS to die", cluster.getMaster().getClusterStatus().
1608         getServers().size() > NB_SERVERS);
1609   }
1610 
1611   private void awaitDaughters(TableName tableName, int numDaughters) throws InterruptedException {
1612     // Wait till regions are back on line again.
1613     for (int i=0; cluster.getRegions(tableName).size() < numDaughters && i<60; i++) {
1614       LOG.info("Waiting for repair to happen");
1615       Thread.sleep(1000);
1616     }
1617     if (cluster.getRegions(tableName).size() < numDaughters) {
1618       fail("Waiting too long for daughter regions");
1619     }
1620   }
1621 
1622   private List<HRegion> awaitTableRegions(final TableName tableName) throws InterruptedException {
1623     List<HRegion> regions = null;
1624     for (int i = 0; i < 100; i++) {
1625       regions = cluster.getRegions(tableName);
1626       if (regions.size() > 0) break;
1627       Thread.sleep(100);
1628     }
1629     return regions;
1630   }
1631 
1632   private HTable createTableAndWait(TableName tableName, byte[] cf) throws IOException,
1633       InterruptedException {
1634     HTable t = TESTING_UTIL.createTable(tableName, cf);
1635     awaitTableRegions(tableName);
1636     assertTrue("Table not online: " + tableName,
1637       cluster.getRegions(tableName).size() != 0);
1638     return t;
1639   }
1640 
1641   public static class MockMasterWithoutCatalogJanitor extends HMaster {
1642 
1643     public MockMasterWithoutCatalogJanitor(Configuration conf, CoordinatedStateManager cp)
1644       throws IOException, KeeperException,
1645         InterruptedException {
1646       super(conf, cp);
1647     }
1648   }
1649 
1650   private static class SplittingNodeCreationFailedException  extends IOException {
1651     private static final long serialVersionUID = 1652404976265623004L;
1652 
1653     public SplittingNodeCreationFailedException () {
1654       super();
1655     }
1656   }
1657 
1658   public static class MockedRegionObserver extends BaseRegionObserver {
1659     private SplitTransactionImpl st = null;
1660     private PairOfSameType<Region> daughterRegions = null;
1661 
1662     @Override
1663     public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
1664         byte[] splitKey, List<Mutation> metaEntries) throws IOException {
1665       RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1666       HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1667       List<Region> onlineRegions =
1668           rs.getOnlineRegions(TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2"));
1669       Region region = onlineRegions.get(0);
1670       for (Region r : onlineRegions) {
1671         if (r.getRegionInfo().containsRow(splitKey)) {
1672           region = r;
1673           break;
1674         }
1675       }
1676       st = new SplitTransactionImpl((HRegion) region, splitKey);
1677       if (!st.prepare()) {
1678         LOG.error("Prepare for the table " + region.getTableDesc().getNameAsString()
1679             + " failed. So returning null. ");
1680         ctx.bypass();
1681         return;
1682       }
1683       ((HRegion)region).forceSplit(splitKey);
1684       daughterRegions = st.stepsBeforePONR(rs, rs, false);
1685       HRegionInfo copyOfParent = new HRegionInfo(region.getRegionInfo());
1686       copyOfParent.setOffline(true);
1687       copyOfParent.setSplit(true);
1688       // Put for parent
1689       Put putParent = MetaTableAccessor.makePutFromRegionInfo(copyOfParent);
1690       MetaTableAccessor.addDaughtersToPut(putParent, daughterRegions.getFirst().getRegionInfo(),
1691         daughterRegions.getSecond().getRegionInfo());
1692       metaEntries.add(putParent);
1693       // Puts for daughters
1694       Put putA = MetaTableAccessor.makePutFromRegionInfo(
1695         daughterRegions.getFirst().getRegionInfo());
1696       Put putB = MetaTableAccessor.makePutFromRegionInfo(
1697         daughterRegions.getSecond().getRegionInfo());
1698       st.addLocation(putA, rs.getServerName(), 1);
1699       st.addLocation(putB, rs.getServerName(), 1);
1700       metaEntries.add(putA);
1701       metaEntries.add(putB);
1702     }
1703 
1704     @Override
1705     public void preSplitAfterPONR(ObserverContext<RegionCoprocessorEnvironment> ctx)
1706         throws IOException {
1707       RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1708       HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1709       st.stepsAfterPONR(rs, rs, daughterRegions, null);
1710     }
1711 
1712   }
1713 
1714   static class CustomSplitPolicy extends RegionSplitPolicy {
1715 
1716     @Override
1717     protected boolean shouldSplit() {
1718       return true;
1719     }
1720 
1721     @Override
1722     public boolean skipStoreFileRangeCheck(String familyName) {
1723       if(familyName.startsWith("i_")) {
1724         return true;
1725       } else {
1726         return false;
1727       }
1728     }
1729   }
1730 }
1731