View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  
24  import org.apache.hadoop.hbase.HBaseTestingUtility;
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.HRegionInfo;
27  import org.apache.hadoop.hbase.HTableDescriptor;
28  import org.apache.hadoop.hbase.testclassification.MediumTests;
29  import org.apache.hadoop.hbase.NotServingRegionException;
30  import org.apache.hadoop.hbase.ServerName;
31  import org.apache.hadoop.hbase.MetaTableAccessor;
32  import org.apache.hadoop.hbase.TableName;
33  import org.apache.hadoop.hbase.client.HTable;
34  import org.apache.hadoop.hbase.client.Put;
35  import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
36  import org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager;
37  import org.apache.hadoop.hbase.coordination.ZkOpenRegionCoordination;
38  import org.apache.hadoop.hbase.executor.EventType;
39  import org.apache.hadoop.hbase.protobuf.RequestConverter;
40  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
41  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CloseRegionRequest;
42  import org.apache.hadoop.hbase.regionserver.handler.OpenRegionHandler;
43  import org.apache.hadoop.hbase.util.Threads;
44  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
45  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
46  import org.apache.zookeeper.KeeperException;
47  import org.apache.zookeeper.KeeperException.NodeExistsException;
48  import org.junit.After;
49  import org.junit.AfterClass;
50  import org.junit.Assert;
51  import org.junit.BeforeClass;
52  import org.junit.Test;
53  import org.junit.experimental.categories.Category;
54  import org.mortbay.log.Log;
55  
56  import com.google.protobuf.ServiceException;
57  
58  
59  /**
60   * Tests on the region server, without the master.
61   */
62  @Category(MediumTests.class)
63  public class TestRegionServerNoMaster {
64  
65    private static final int NB_SERVERS = 1;
66    private static HTable table;
67    private static final byte[] row = "ee".getBytes();
68  
69    private static HRegionInfo hri;
70  
71    private static byte[] regionName;
72    private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
73  
74  
75    @BeforeClass
76    public static void before() throws Exception {
77      HTU.getConfiguration().setBoolean("hbase.assignment.usezk", true);
78      HTU.startMiniCluster(NB_SERVERS);
79      final TableName tableName = TableName.valueOf(TestRegionServerNoMaster.class.getSimpleName());
80  
81      // Create table then get the single region for our new table.
82      table = HTU.createTable(tableName,HConstants.CATALOG_FAMILY);
83      Put p = new Put(row);
84      p.add(HConstants.CATALOG_FAMILY, row, row);
85      table.put(p);
86  
87      hri = table.getRegionLocation(row, false).getRegionInfo();
88      regionName = hri.getRegionName();
89  
90      stopMasterAndAssignMeta(HTU);
91    }
92  
93    public static void stopMasterAndAssignMeta(HBaseTestingUtility HTU)
94        throws NodeExistsException, KeeperException, IOException, InterruptedException {
95      // No master
96      HTU.getHBaseCluster().getMaster().stopMaster();
97  
98      Log.info("Waiting until master thread exits");
99      while (HTU.getHBaseCluster().getMasterThread() != null
100         && HTU.getHBaseCluster().getMasterThread().isAlive()) {
101       Threads.sleep(100);
102     }
103   }
104 
105   /** Flush the given region in the mini cluster. Since no master, we cannot use HBaseAdmin.flush() */
106   public static void flushRegion(HBaseTestingUtility HTU, HRegionInfo regionInfo) throws IOException {
107     for (RegionServerThread rst : HTU.getMiniHBaseCluster().getRegionServerThreads()) {
108       Region region = rst.getRegionServer().getRegionByEncodedName(regionInfo.getEncodedName());
109       if (region != null) {
110         region.flush(true);
111         return;
112       }
113     }
114     throw new IOException("Region to flush cannot be found");
115   }
116 
117   @AfterClass
118   public static void afterClass() throws Exception {
119     table.close();
120     HTU.shutdownMiniCluster();
121   }
122 
123   @After
124   public void after() throws Exception {
125     // Clean the state if the test failed before cleaning the znode
126     // It does not manage all bad failures, so if there are multiple failures, only
127     //  the first one should be looked at.
128     ZKAssign.deleteNodeFailSilent(HTU.getZooKeeperWatcher(), hri);
129   }
130 
131 
132   private static HRegionServer getRS() {
133     return HTU.getHBaseCluster().getLiveRegionServerThreads().get(0).getRegionServer();
134   }
135 
136   public static void openRegion(HBaseTestingUtility HTU, HRegionServer rs, HRegionInfo hri)
137       throws Exception {
138     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, rs.getServerName());
139     // first version is '0'
140     AdminProtos.OpenRegionRequest orr =
141         RequestConverter.buildOpenRegionRequest(rs.getServerName(), hri, 0, null, null);
142     AdminProtos.OpenRegionResponse responseOpen = rs.rpcServices.openRegion(null, orr);
143 
144     Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
145     Assert.assertTrue(responseOpen.getOpeningState(0).
146         equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED));
147 
148 
149     checkRegionIsOpened(HTU, rs, hri);
150   }
151 
152   public static void checkRegionIsOpened(HBaseTestingUtility HTU, HRegionServer rs,
153       HRegionInfo hri) throws Exception {
154     while (!rs.getRegionsInTransitionInRS().isEmpty()) {
155       Thread.sleep(1);
156     }
157 
158     Assert.assertTrue(rs.getRegion(hri.getRegionName()).isAvailable());
159 
160     Assert.assertTrue(
161       ZKAssign.deleteOpenedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
162         rs.getServerName()));
163   }
164 
165   public static void closeRegion(HBaseTestingUtility HTU, HRegionServer rs, HRegionInfo hri)
166       throws Exception {
167     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, rs.getServerName());
168     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
169       rs.getServerName(), hri.getEncodedName(), true);
170     AdminProtos.CloseRegionResponse responseClose = rs.rpcServices.closeRegion(null, crr);
171     Assert.assertTrue(responseClose.getClosed());
172     checkRegionIsClosed(HTU, rs, hri);
173     ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(), null);
174   }
175 
176   public static void checkRegionIsClosed(HBaseTestingUtility HTU, HRegionServer rs,
177       HRegionInfo hri) throws Exception {
178     while (!rs.getRegionsInTransitionInRS().isEmpty()) {
179       Thread.sleep(1);
180     }
181 
182     boolean exception = false;
183     try {
184       while ((rs.getRegion(hri.getRegionName()).isAvailable())) {
185         Thread.sleep(10);
186       }
187     } catch (NotServingRegionException expected) {
188       exception = true;
189       // That's how it work: if the region is closed we have an exception.
190     }
191     assert(exception);
192     // We don't delete the znode here, because there is not always a znode.
193   }
194 
195   /**
196    * Close the region without using ZK
197    */
198   private void closeRegionNoZK() throws Exception {
199     // no transition in ZK
200     AdminProtos.CloseRegionRequest crr =
201         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
202     AdminProtos.CloseRegionResponse responseClose = getRS().rpcServices.closeRegion(null, crr);
203     Assert.assertTrue(responseClose.getClosed());
204 
205     // now waiting & checking. After a while, the transition should be done and the region closed
206     checkRegionIsClosed(HTU, getRS(), hri);
207   }
208 
209 
210   @Test(timeout = 60000)
211   public void testCloseByRegionServer() throws Exception {
212     closeRegionNoZK();
213     openRegion(HTU, getRS(), hri);
214   }
215 
216   @Test(timeout = 60000)
217   public void testCloseByMasterWithoutZNode() throws Exception {
218 
219     // Transition in ZK on. This should fail, as there is no znode
220     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
221       getRS().getServerName(), regionName, true);
222     AdminProtos.CloseRegionResponse responseClose = getRS().rpcServices.closeRegion(null, crr);
223     Assert.assertTrue(responseClose.getClosed());
224 
225     // now waiting. After a while, the transition should be done
226     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
227       Thread.sleep(1);
228     }
229 
230     // the region is still available, the close got rejected at the end
231     Assert.assertTrue("The close should have failed", getRS().getRegion(regionName).isAvailable());
232   }
233 
234   @Test(timeout = 60000)
235   public void testOpenCloseByMasterWithZNode() throws Exception {
236 
237     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
238 
239     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
240       getRS().getServerName(), regionName, true);
241     AdminProtos.CloseRegionResponse responseClose = getRS().rpcServices.closeRegion(null, crr);
242     Assert.assertTrue(responseClose.getClosed());
243 
244     checkRegionIsClosed(HTU, getRS(), hri);
245 
246     ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
247       getRS().getServerName());
248 
249     openRegion(HTU, getRS(), hri);
250   }
251 
252   /**
253    * Test that we can send multiple openRegion to the region server.
254    * This is used when:
255    * - there is a SocketTimeout: in this case, the master does not know if the region server
256    * received the request before the timeout.
257    * - We have a socket error during the operation: same stuff: we don't know
258    * - a master failover: if we find a znode in thz M_ZK_REGION_OFFLINE, we don't know if
259    * the region server has received the query or not. Only solution to be efficient: re-ask
260    * immediately.
261    */
262   @Test(timeout = 60000)
263   public void testMultipleOpen() throws Exception {
264 
265     // We close
266     closeRegionNoZK();
267     checkRegionIsClosed(HTU, getRS(), hri);
268 
269     // We reopen. We need a ZK node here, as a open is always triggered by a master.
270     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
271 
272     // We're sending multiple requests in a row. The region server must handle this nicely.
273     for (int i = 0; i < 10; i++) {
274       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
275         getRS().getServerName(), hri, 0, null, null);
276       AdminProtos.OpenRegionResponse responseOpen = getRS().rpcServices.openRegion(null, orr);
277       Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
278 
279       AdminProtos.OpenRegionResponse.RegionOpeningState ors = responseOpen.getOpeningState(0);
280       Assert.assertTrue("request " + i + " failed",
281           ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED) ||
282               ors.equals(AdminProtos.OpenRegionResponse.RegionOpeningState.ALREADY_OPENED)
283       );
284     }
285 
286     checkRegionIsOpened(HTU, getRS(), hri);
287   }
288 
289   @Test
290   public void testOpenClosingRegion() throws Exception {
291     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
292 
293     try {
294       // we re-opened meta so some of its data is lost
295       ServerName sn = getRS().getServerName();
296       MetaTableAccessor.updateRegionLocation(getRS().getConnection(),
297         hri, sn, getRS().getRegion(regionName).getOpenSeqNum(), -1);
298       // fake region to be closing now, need to clear state afterwards
299       getRS().regionsInTransitionInRS.put(hri.getEncodedNameAsBytes(), Boolean.FALSE);
300       AdminProtos.OpenRegionRequest orr =
301         RequestConverter.buildOpenRegionRequest(sn, hri, 0, null, null);
302       getRS().rpcServices.openRegion(null, orr);
303       Assert.fail("The closing region should not be opened");
304     } catch (ServiceException se) {
305       Assert.assertTrue("The region should be already in transition",
306         se.getCause() instanceof RegionAlreadyInTransitionException);
307     } finally {
308       getRS().regionsInTransitionInRS.remove(hri.getEncodedNameAsBytes());
309     }
310   }
311 
312   @Test(timeout = 60000)
313   public void testMultipleCloseFromMaster() throws Exception {
314 
315     // As opening, we must support multiple requests on the same region
316     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
317     for (int i = 0; i < 10; i++) {
318       AdminProtos.CloseRegionRequest crr =
319           RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, 0, null, true);
320       try {
321         AdminProtos.CloseRegionResponse responseClose = getRS().rpcServices.closeRegion(null, crr);
322         Assert.assertEquals("The first request should succeeds", 0, i);
323         Assert.assertTrue("request " + i + " failed",
324             responseClose.getClosed() || responseClose.hasClosed());
325       } catch (ServiceException se) {
326         Assert.assertTrue("The next queries should throw an exception.", i > 0);
327       }
328     }
329 
330     checkRegionIsClosed(HTU, getRS(), hri);
331 
332     Assert.assertTrue(
333       ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(),
334         getRS().getServerName())
335     );
336 
337     openRegion(HTU, getRS(), hri);
338   }
339 
340   /**
341    * Test that if we do a close while opening it stops the opening.
342    */
343   @Test(timeout = 60000)
344   public void testCancelOpeningWithoutZK() throws Exception {
345     // We close
346     closeRegionNoZK();
347     checkRegionIsClosed(HTU, getRS(), hri);
348 
349     // Let do the initial steps, without having a handler
350     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
351     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
352 
353     // That's a close without ZK.
354     AdminProtos.CloseRegionRequest crr =
355         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
356     try {
357       getRS().rpcServices.closeRegion(null, crr);
358       Assert.assertTrue(false);
359     } catch (ServiceException expected) {
360     }
361 
362     // The state in RIT should have changed to close
363     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
364         hri.getEncodedNameAsBytes()));
365 
366     // Let's start the open handler
367     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTable());
368 
369     BaseCoordinatedStateManager csm = new ZkCoordinatedStateManager();
370     csm.initialize(getRS());
371     csm.start();
372 
373     ZkOpenRegionCoordination.ZkOpenRegionDetails zkCrd =
374       new ZkOpenRegionCoordination.ZkOpenRegionDetails();
375     zkCrd.setServerName(getRS().getServerName());
376     zkCrd.setVersionOfOfflineNode(0);
377 
378     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd,
379       -1, csm.getOpenRegionCoordination(), zkCrd));
380 
381     // The open handler should have removed the region from RIT but kept the region closed
382     checkRegionIsClosed(HTU, getRS(), hri);
383 
384     // The open handler should have updated the value in ZK.
385     Assert.assertTrue(ZKAssign.deleteNode(
386         getRS().getZooKeeper(), hri.getEncodedName(),
387         EventType.RS_ZK_REGION_FAILED_OPEN, 1)
388     );
389 
390     openRegion(HTU, getRS(), hri);
391   }
392 
393   /**
394    * Test an open then a close with ZK. This is going to mess-up the ZK states, so
395    * the opening will fail as well because it doesn't find what it expects in ZK.
396    */
397   @Test(timeout = 60000)
398   public void testCancelOpeningWithZK() throws Exception {
399     // We close
400     closeRegionNoZK();
401     checkRegionIsClosed(HTU, getRS(), hri);
402 
403     // Let do the initial steps, without having a handler
404     getRS().getRegionsInTransitionInRS().put(hri.getEncodedNameAsBytes(), Boolean.TRUE);
405 
406     // That's a close without ZK.
407     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
408     AdminProtos.CloseRegionRequest crr =
409         RequestConverter.buildCloseRegionRequest(getRS().getServerName(), regionName, false);
410     try {
411       getRS().rpcServices.closeRegion(null, crr);
412       Assert.assertTrue(false);
413     } catch (ServiceException expected) {
414       Assert.assertTrue(expected.getCause() instanceof RegionAlreadyInTransitionException);
415     }
416 
417     // The close should have left the ZK state as it is: it's the job the AM to delete it
418     Assert.assertTrue(ZKAssign.deleteNode(
419         getRS().getZooKeeper(), hri.getEncodedName(),
420         EventType.M_ZK_REGION_CLOSING, 0)
421     );
422 
423     // The state in RIT should have changed to close
424     Assert.assertEquals(Boolean.FALSE, getRS().getRegionsInTransitionInRS().get(
425         hri.getEncodedNameAsBytes()));
426 
427     // Let's start the open handler
428     // It should not succeed for two reasons:
429     //  1) There is no ZK node
430     //  2) The region in RIT was changed.
431     // The order is more or less implementation dependant.
432     HTableDescriptor htd = getRS().tableDescriptors.get(hri.getTable());
433 
434     BaseCoordinatedStateManager csm = new ZkCoordinatedStateManager();
435     csm.initialize(getRS());
436     csm.start();
437 
438     ZkOpenRegionCoordination.ZkOpenRegionDetails zkCrd =
439       new ZkOpenRegionCoordination.ZkOpenRegionDetails();
440     zkCrd.setServerName(getRS().getServerName());
441     zkCrd.setVersionOfOfflineNode(0);
442 
443     getRS().service.submit(new OpenRegionHandler(getRS(), getRS(), hri, htd,
444       -1, csm.getOpenRegionCoordination(), zkCrd));
445 
446     // The open handler should have removed the region from RIT but kept the region closed
447     checkRegionIsClosed(HTU, getRS(), hri);
448 
449     // We should not find any znode here.
450     Assert.assertEquals(-1, ZKAssign.getVersion(HTU.getZooKeeperWatcher(), hri));
451 
452     openRegion(HTU, getRS(), hri);
453   }
454 
455   /**
456    * Tests an on-the-fly RPC that was scheduled for the earlier RS on the same port
457    * for openRegion. The region server should reject this RPC. (HBASE-9721)
458    */
459   @Test
460   public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception {
461     Assert.assertTrue(getRS().getRegion(regionName).isAvailable());
462 
463     ServerName sn = getRS().getServerName();
464     ServerName earlierServerName = ServerName.valueOf(sn.getHostname(), sn.getPort(), 1);
465 
466     try {
467       CloseRegionRequest request = RequestConverter.buildCloseRegionRequest(earlierServerName, regionName, true);
468       getRS().getRSRpcServices().closeRegion(null, request);
469       Assert.fail("The closeRegion should have been rejected");
470     } catch (ServiceException se) {
471       Assert.assertTrue(se.getCause() instanceof IOException);
472       Assert.assertTrue(se.getCause().getMessage().contains("This RPC was intended for a different server"));
473     }
474 
475     //actual close
476     closeRegionNoZK();
477     try {
478       AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
479         earlierServerName, hri, 0, null, null);
480       getRS().getRSRpcServices().openRegion(null, orr);
481       Assert.fail("The openRegion should have been rejected");
482     } catch (ServiceException se) {
483       Assert.assertTrue(se.getCause() instanceof IOException);
484       Assert.assertTrue(se.getCause().getMessage().contains("This RPC was intended for a different server"));
485     } finally {
486       openRegion(HTU, getRS(), hri);
487     }
488   }
489 }