1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.zookeeper;
19
20 import com.google.protobuf.InvalidProtocolBufferException;
21
22 import org.apache.commons.logging.Log;
23 import org.apache.commons.logging.LogFactory;
24 import org.apache.hadoop.hbase.classification.InterfaceAudience;
25 import org.apache.hadoop.conf.Configuration;
26 import org.apache.hadoop.hbase.HConstants;
27 import org.apache.hadoop.hbase.HRegionInfo;
28 import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
29 import org.apache.hadoop.hbase.ServerName;
30 import org.apache.hadoop.hbase.classification.InterfaceAudience;
31 import org.apache.hadoop.hbase.client.ClusterConnection;
32 import org.apache.hadoop.hbase.client.Connection;
33 import org.apache.hadoop.hbase.client.HConnection;
34 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
35 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
36 import org.apache.hadoop.hbase.exceptions.DeserializationException;
37 import org.apache.hadoop.hbase.ServerName;
38 import org.apache.hadoop.hbase.ipc.FailedServerException;
39 import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
40 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
41 import org.apache.hadoop.hbase.master.RegionState;
42 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
43 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
44 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
45 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
46 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
47 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.MetaRegionServer;
48 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
49 import org.apache.hadoop.hbase.util.Bytes;
50 import org.apache.hadoop.hbase.util.Pair;
51 import org.apache.hadoop.ipc.RemoteException;
52 import org.apache.zookeeper.KeeperException;
53
54 import java.io.EOFException;
55 import java.io.IOException;
56 import java.net.ConnectException;
57 import java.net.NoRouteToHostException;
58 import java.net.SocketException;
59 import java.net.SocketTimeoutException;
60 import java.rmi.UnknownHostException;
61
62 import java.util.List;
63 import java.util.ArrayList;
64
65 import javax.annotation.Nullable;
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82 @InterfaceAudience.Private
83 public class MetaTableLocator {
84 private static final Log LOG = LogFactory.getLog(MetaTableLocator.class);
85
86
87 private volatile boolean stopped = false;
88
89
90
91
92
93 public boolean isLocationAvailable(ZooKeeperWatcher zkw) {
94 return getMetaRegionLocation(zkw) != null;
95 }
96
97
98
99
100
101 public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw) {
102 return getMetaRegionsAndLocations(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
103 }
104
105
106
107
108
109
110
111 public List<Pair<HRegionInfo, ServerName>> getMetaRegionsAndLocations(ZooKeeperWatcher zkw,
112 int replicaId) {
113 ServerName serverName = getMetaRegionLocation(zkw, replicaId);
114 List<Pair<HRegionInfo, ServerName>> list = new ArrayList<Pair<HRegionInfo, ServerName>>();
115 list.add(new Pair<HRegionInfo, ServerName>(RegionReplicaUtil.getRegionInfoForReplica(
116 HRegionInfo.FIRST_META_REGIONINFO, replicaId), serverName));
117 return list;
118 }
119
120
121
122
123
124 public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw) {
125 return getMetaRegions(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
126 }
127
128
129
130
131
132
133
134 public List<HRegionInfo> getMetaRegions(ZooKeeperWatcher zkw, int replicaId) {
135 List<Pair<HRegionInfo, ServerName>> result;
136 result = getMetaRegionsAndLocations(zkw, replicaId);
137 return getListOfHRegionInfos(result);
138 }
139
140 private List<HRegionInfo> getListOfHRegionInfos(
141 final List<Pair<HRegionInfo, ServerName>> pairs) {
142 if (pairs == null || pairs.isEmpty()) return null;
143 List<HRegionInfo> result = new ArrayList<HRegionInfo>(pairs.size());
144 for (Pair<HRegionInfo, ServerName> pair: pairs) {
145 result.add(pair.getFirst());
146 }
147 return result;
148 }
149
150
151
152
153
154
155 public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw) {
156 try {
157 RegionState state = getMetaRegionState(zkw);
158 return state.isOpened() ? state.getServerName() : null;
159 } catch (KeeperException ke) {
160 return null;
161 }
162 }
163
164
165
166
167
168
169
170 public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw, int replicaId) {
171 try {
172 RegionState state = getMetaRegionState(zkw, replicaId);
173 return state.isOpened() ? state.getServerName() : null;
174 } catch (KeeperException ke) {
175 return null;
176 }
177 }
178
179
180
181
182
183
184
185
186
187
188
189
190
191 public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, long timeout)
192 throws InterruptedException, NotAllMetaRegionsOnlineException {
193 return waitMetaRegionLocation(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout);
194 }
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209 public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, int replicaId, long timeout)
210 throws InterruptedException, NotAllMetaRegionsOnlineException {
211 try {
212 if (ZKUtil.checkExists(zkw, zkw.baseZNode) == -1) {
213 String errorMsg = "Check the value configured in 'zookeeper.znode.parent'. "
214 + "There could be a mismatch with the one configured in the master.";
215 LOG.error(errorMsg);
216 throw new IllegalArgumentException(errorMsg);
217 }
218 } catch (KeeperException e) {
219 throw new IllegalStateException("KeeperException while trying to check baseZNode:", e);
220 }
221 ServerName sn = blockUntilAvailable(zkw, replicaId, timeout);
222
223 if (sn == null) {
224 throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms");
225 }
226
227 return sn;
228 }
229
230
231
232
233
234
235
236
237 public void waitMetaRegionLocation(ZooKeeperWatcher zkw) throws InterruptedException {
238 long startTime = System.currentTimeMillis();
239 while (!stopped) {
240 try {
241 if (waitMetaRegionLocation(zkw, 100) != null) break;
242 long sleepTime = System.currentTimeMillis() - startTime;
243
244 if ((sleepTime + 1) % 10000 == 0) {
245 LOG.warn("Have been waiting for meta to be assigned for " + sleepTime + "ms");
246 }
247 } catch (NotAllMetaRegionsOnlineException e) {
248 if (LOG.isTraceEnabled()) {
249 LOG.trace("hbase:meta still not available, sleeping and retrying." +
250 " Reason: " + e.getMessage());
251 }
252 }
253 }
254 }
255
256
257
258
259
260
261
262
263
264
265
266 public boolean verifyMetaRegionLocation(HConnection hConnection,
267 ZooKeeperWatcher zkw, final long timeout)
268 throws InterruptedException, IOException {
269 return verifyMetaRegionLocation(hConnection, zkw, timeout, HRegionInfo.DEFAULT_REPLICA_ID);
270 }
271
272
273
274
275
276
277
278
279
280
281
282 public boolean verifyMetaRegionLocation(HConnection hConnection,
283 ZooKeeperWatcher zkw, final long timeout, int replicaId)
284 throws InterruptedException, IOException {
285 AdminProtos.AdminService.BlockingInterface service = null;
286 try {
287 service = getMetaServerConnection(hConnection, zkw, timeout, replicaId);
288 } catch (NotAllMetaRegionsOnlineException e) {
289
290 } catch (ServerNotRunningYetException e) {
291
292 } catch (UnknownHostException e) {
293
294 } catch (RegionServerStoppedException e) {
295
296 }
297 return (service != null) && verifyRegionLocation(hConnection, service,
298 getMetaRegionLocation(zkw, replicaId), RegionReplicaUtil.getRegionInfoForReplica(
299 HRegionInfo.FIRST_META_REGIONINFO, replicaId).getRegionName());
300 }
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317 private boolean verifyRegionLocation(final Connection connection,
318 AdminService.BlockingInterface hostingServer, final ServerName address,
319 final byte [] regionName)
320 throws IOException {
321 if (hostingServer == null) {
322 LOG.info("Passed hostingServer is null");
323 return false;
324 }
325 Throwable t;
326 PayloadCarryingRpcController controller = null;
327 if (connection instanceof ClusterConnection) {
328 controller = ((ClusterConnection) connection).getRpcControllerFactory().newController();
329 }
330 try {
331
332 return ProtobufUtil.getRegionInfo(controller, hostingServer, regionName) != null;
333 } catch (ConnectException e) {
334 t = e;
335 } catch (RetriesExhaustedException e) {
336 t = e;
337 } catch (RemoteException e) {
338 IOException ioe = e.unwrapRemoteException();
339 t = ioe;
340 } catch (IOException e) {
341 Throwable cause = e.getCause();
342 if (cause != null && cause instanceof EOFException) {
343 t = cause;
344 } else if (cause != null && cause.getMessage() != null
345 && cause.getMessage().contains("Connection reset")) {
346 t = cause;
347 } else {
348 t = e;
349 }
350 }
351 LOG.info("Failed verification of " + Bytes.toStringBinary(regionName) +
352 " at address=" + address + ", exception=" + t.getMessage());
353 return false;
354 }
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369 private AdminService.BlockingInterface getMetaServerConnection(HConnection hConnection,
370 ZooKeeperWatcher zkw, long timeout, int replicaId)
371 throws InterruptedException, NotAllMetaRegionsOnlineException, IOException {
372 return getCachedConnection(hConnection, waitMetaRegionLocation(zkw, replicaId, timeout));
373 }
374
375
376
377
378
379
380
381
382 @SuppressWarnings("deprecation")
383 private static AdminService.BlockingInterface getCachedConnection(HConnection hConnection,
384 ServerName sn)
385 throws IOException {
386 if (sn == null) {
387 return null;
388 }
389 AdminService.BlockingInterface service = null;
390 try {
391 service = hConnection.getAdmin(sn);
392 } catch (RetriesExhaustedException e) {
393 if (e.getCause() != null && e.getCause() instanceof ConnectException) {
394
395 } else {
396 throw e;
397 }
398 } catch (SocketTimeoutException e) {
399 LOG.debug("Timed out connecting to " + sn);
400 } catch (NoRouteToHostException e) {
401 LOG.debug("Connecting to " + sn, e);
402 } catch (SocketException e) {
403 LOG.debug("Exception connecting to " + sn);
404 } catch (UnknownHostException e) {
405 LOG.debug("Unknown host exception connecting to " + sn);
406 } catch (FailedServerException e) {
407 if (LOG.isDebugEnabled()) {
408 LOG.debug("Server " + sn + " is in failed server list.");
409 }
410 } catch (IOException ioe) {
411 Throwable cause = ioe.getCause();
412 if (ioe instanceof ConnectException) {
413
414 } else if (cause != null && cause instanceof EOFException) {
415
416 } else if (cause != null && cause.getMessage() != null &&
417 cause.getMessage().toLowerCase().contains("connection reset")) {
418
419 } else {
420 throw ioe;
421 }
422
423 }
424 return service;
425 }
426
427
428
429
430
431
432
433
434
435 public static void setMetaLocation(ZooKeeperWatcher zookeeper,
436 ServerName serverName, RegionState.State state) throws KeeperException {
437 setMetaLocation(zookeeper, serverName, HRegionInfo.DEFAULT_REPLICA_ID, state);
438 }
439
440
441
442
443
444
445
446
447
448
449 public static void setMetaLocation(ZooKeeperWatcher zookeeper,
450 ServerName serverName, int replicaId, RegionState.State state) throws KeeperException {
451 LOG.info("Setting hbase:meta region location in ZooKeeper as " + serverName);
452
453
454 MetaRegionServer pbrsr = MetaRegionServer.newBuilder()
455 .setServer(ProtobufUtil.toServerName(serverName))
456 .setRpcVersion(HConstants.RPC_CURRENT_VERSION)
457 .setState(state.convert()).build();
458 byte[] data = ProtobufUtil.prependPBMagic(pbrsr.toByteArray());
459 try {
460 ZKUtil.setData(zookeeper, zookeeper.getZNodeForReplica(replicaId), data);
461 } catch(KeeperException.NoNodeException nne) {
462 if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
463 LOG.debug("META region location doesn't exist, create it");
464 } else {
465 LOG.debug("META region location doesn't exist for replicaId " + replicaId +
466 ", create it");
467 }
468 ZKUtil.createAndWatch(zookeeper, zookeeper.getZNodeForReplica(replicaId), data);
469 }
470 }
471
472
473
474
475 public static RegionState getMetaRegionState(ZooKeeperWatcher zkw) throws KeeperException {
476 return getMetaRegionState(zkw, HRegionInfo.DEFAULT_REPLICA_ID);
477 }
478
479
480
481
482
483
484
485
486 public static RegionState getMetaRegionState(ZooKeeperWatcher zkw, int replicaId)
487 throws KeeperException {
488 RegionState.State state = RegionState.State.OPEN;
489 ServerName serverName = null;
490 try {
491 byte[] data = ZKUtil.getData(zkw, zkw.getZNodeForReplica(replicaId));
492 if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) {
493 try {
494 int prefixLen = ProtobufUtil.lengthOfPBMagic();
495 ZooKeeperProtos.MetaRegionServer rl =
496 ZooKeeperProtos.MetaRegionServer.PARSER.parseFrom
497 (data, prefixLen, data.length - prefixLen);
498 if (rl.hasState()) {
499 state = RegionState.State.convert(rl.getState());
500 }
501 HBaseProtos.ServerName sn = rl.getServer();
502 serverName = ServerName.valueOf(
503 sn.getHostName(), sn.getPort(), sn.getStartCode());
504 } catch (InvalidProtocolBufferException e) {
505 throw new DeserializationException("Unable to parse meta region location");
506 }
507 } else {
508
509 serverName = ServerName.parseFrom(data);
510 }
511 } catch (DeserializationException e) {
512 throw ZKUtil.convert(e);
513 } catch (InterruptedException e) {
514 Thread.currentThread().interrupt();
515 }
516 if (serverName == null) {
517 state = RegionState.State.OFFLINE;
518 }
519 return new RegionState(
520 RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId),
521 state, serverName);
522 }
523
524
525
526
527
528
529 public void deleteMetaLocation(ZooKeeperWatcher zookeeper)
530 throws KeeperException {
531 deleteMetaLocation(zookeeper, HRegionInfo.DEFAULT_REPLICA_ID);
532 }
533
534 public void deleteMetaLocation(ZooKeeperWatcher zookeeper, int replicaId)
535 throws KeeperException {
536 if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
537 LOG.info("Deleting hbase:meta region location in ZooKeeper");
538 } else {
539 LOG.info("Deleting hbase:meta for " + replicaId + " region location in ZooKeeper");
540 }
541 try {
542
543 ZKUtil.deleteNode(zookeeper, zookeeper.getZNodeForReplica(replicaId));
544 } catch(KeeperException.NoNodeException nne) {
545
546 }
547 }
548
549
550
551
552
553
554
555
556
557 public List<ServerName> blockUntilAvailable(final ZooKeeperWatcher zkw,
558 final long timeout, Configuration conf)
559 throws InterruptedException {
560 int numReplicasConfigured = 1;
561
562 List<ServerName> servers = new ArrayList<ServerName>();
563
564
565 ServerName server = blockUntilAvailable(zkw, timeout);
566 if (server == null) return null;
567 servers.add(server);
568
569 try {
570 List<String> metaReplicaNodes = zkw.getMetaReplicaNodes();
571 numReplicasConfigured = metaReplicaNodes.size();
572 } catch (KeeperException e) {
573 LOG.warn("Got ZK exception " + e);
574 }
575 for (int replicaId = 1; replicaId < numReplicasConfigured; replicaId++) {
576
577 servers.add(getMetaRegionLocation(zkw, replicaId));
578 }
579 return servers;
580 }
581
582
583
584
585
586
587
588
589 public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw,
590 final long timeout)
591 throws InterruptedException {
592 return blockUntilAvailable(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout);
593 }
594
595
596
597
598
599
600
601
602
603 public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw, int replicaId,
604 final long timeout)
605 throws InterruptedException {
606 if (timeout < 0) throw new IllegalArgumentException();
607 if (zkw == null) throw new IllegalArgumentException();
608 long startTime = System.currentTimeMillis();
609 ServerName sn = null;
610 while (true) {
611 sn = getMetaRegionLocation(zkw, replicaId);
612 if (sn != null || (System.currentTimeMillis() - startTime)
613 > timeout - HConstants.SOCKET_RETRY_WAIT_MS) {
614 break;
615 }
616 Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS);
617 }
618 return sn;
619 }
620
621
622
623
624
625 public void stop() {
626 if (!stopped) {
627 LOG.debug("Stopping MetaTableLocator");
628 stopped = true;
629 }
630 }
631 }