View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.balancer;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.HashMap;
24  import java.util.List;
25  import java.util.Map;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.hbase.HBaseIOException;
32  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.NamespaceDescriptor;
35  import org.apache.hadoop.hbase.ServerLoad;
36  import org.apache.hadoop.hbase.ServerName;
37  import org.apache.hadoop.hbase.TableName;
38  import org.apache.hadoop.hbase.master.RackManager;
39  import org.apache.hadoop.hbase.master.RegionPlan;
40  import org.apache.hadoop.hbase.master.ServerManager;
41  import org.apache.hadoop.hbase.master.SnapshotOfRegionAssignmentFromMeta;
42  import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan.Position;
43  import org.apache.hadoop.hbase.util.Pair;
44  
45  /**
46   * An implementation of the {@link org.apache.hadoop.hbase.master.LoadBalancer} 
47   * that assigns favored nodes for each region. There is a Primary RegionServer 
48   * that hosts the region, and then there is Secondary and Tertiary RegionServers. 
49   * Currently, the favored nodes information is used in creating HDFS files - the Primary 
50   * RegionServer passes the primary, secondary, tertiary node addresses as hints to the 
51   * DistributedFileSystem API for creating files on the filesystem. These nodes are treated 
52   * as hints by the HDFS to place the blocks of the file. This alleviates the problem to 
53   * do with reading from remote nodes (since we can make the Secondary RegionServer as the 
54   * new Primary RegionServer) after a region is recovered. This should help provide 
55   * consistent read latencies for the regions even when their primary region servers die.
56   *
57   */
58  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
59  public class FavoredNodeLoadBalancer extends BaseLoadBalancer {
60    private static final Log LOG = LogFactory.getLog(FavoredNodeLoadBalancer.class);
61  
62    private FavoredNodesPlan globalFavoredNodesAssignmentPlan;
63    private RackManager rackManager;
64  
65    @Override
66    public void setConf(Configuration conf) {
67      super.setConf(conf);
68      globalFavoredNodesAssignmentPlan = new FavoredNodesPlan();
69      this.rackManager = new RackManager(conf);
70      super.setConf(conf);
71    }
72  
73    @Override
74    public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState)  {
75      //TODO. Look at is whether Stochastic loadbalancer can be integrated with this
76      List<RegionPlan> plans = new ArrayList<RegionPlan>();
77      //perform a scan of the meta to get the latest updates (if any)
78      SnapshotOfRegionAssignmentFromMeta snaphotOfRegionAssignment =
79          new SnapshotOfRegionAssignmentFromMeta(super.services.getConnection());
80      try {
81        snaphotOfRegionAssignment.initialize();
82      } catch (IOException ie) {
83        LOG.warn("Not running balancer since exception was thrown " + ie);
84        return plans;
85      }
86      globalFavoredNodesAssignmentPlan = snaphotOfRegionAssignment.getExistingAssignmentPlan();
87      Map<ServerName, ServerName> serverNameToServerNameWithoutCode =
88          new HashMap<ServerName, ServerName>();
89      Map<ServerName, ServerName> serverNameWithoutCodeToServerName =
90          new HashMap<ServerName, ServerName>();
91      ServerManager serverMgr = super.services.getServerManager();
92      for (ServerName sn: serverMgr.getOnlineServersList()) {
93        ServerName s = ServerName.valueOf(sn.getHostname(), sn.getPort(), ServerName.NON_STARTCODE);
94        serverNameToServerNameWithoutCode.put(sn, s);
95        serverNameWithoutCodeToServerName.put(s, sn);
96      }
97      for (Map.Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
98        ServerName currentServer = entry.getKey();
99        //get a server without the startcode for the currentServer
100       ServerName currentServerWithoutStartCode = ServerName.valueOf(currentServer.getHostname(),
101           currentServer.getPort(), ServerName.NON_STARTCODE);
102       List<HRegionInfo> list = entry.getValue();
103       for (HRegionInfo region : list) {
104         if(region.getTable().getNamespaceAsString()
105             .equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
106           continue;
107         }
108         List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(region);
109         if (favoredNodes == null || favoredNodes.get(0).equals(currentServerWithoutStartCode)) {
110           continue; //either favorednodes does not exist or we are already on the primary node
111         }
112         ServerName destination = null;
113         //check whether the primary is available
114         destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(0));
115         if (destination == null) {
116           //check whether the region is on secondary/tertiary
117           if (currentServerWithoutStartCode.equals(favoredNodes.get(1)) ||
118               currentServerWithoutStartCode.equals(favoredNodes.get(2))) {
119             continue;
120           }
121           //the region is currently on none of the favored nodes
122           //get it on one of them if possible
123           ServerLoad l1 = super.services.getServerManager().getLoad(
124               serverNameWithoutCodeToServerName.get(favoredNodes.get(1)));
125           ServerLoad l2 = super.services.getServerManager().getLoad(
126               serverNameWithoutCodeToServerName.get(favoredNodes.get(2)));
127           if (l1 != null && l2 != null) {
128             if (l1.getLoad() > l2.getLoad()) {
129               destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(2));
130             } else {
131               destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(1));
132             }
133           } else if (l1 != null) {
134             destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(1));
135           } else if (l2 != null) {
136             destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(2));
137           }
138         }
139 
140         if (destination != null) {
141           RegionPlan plan = new RegionPlan(region, currentServer, destination);
142           plans.add(plan);
143         }
144       }
145     }
146     return plans;
147   }
148 
149   @Override
150   public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
151       List<ServerName> servers) {
152     Map<ServerName, List<HRegionInfo>> assignmentMap;
153     try {
154       FavoredNodeAssignmentHelper assignmentHelper =
155           new FavoredNodeAssignmentHelper(servers, rackManager);
156       assignmentHelper.initialize();
157       if (!assignmentHelper.canPlaceFavoredNodes()) {
158         return super.roundRobinAssignment(regions, servers);
159       }
160       // Segregate the regions into two types:
161       // 1. The regions that have favored node assignment, and where at least
162       //    one of the favored node is still alive. In this case, try to adhere
163       //    to the current favored nodes assignment as much as possible - i.e.,
164       //    if the current primary is gone, then make the secondary or tertiary
165       //    as the new host for the region (based on their current load).
166       //    Note that we don't change the favored
167       //    node assignments here (even though one or more favored node is currently
168       //    down). It is up to the balanceCluster to do this hard work. The HDFS
169       //    can handle the fact that some nodes in the favored nodes hint is down
170       //    It'd allocate some other DNs. In combination with stale settings for HDFS,
171       //    we should be just fine.
172       // 2. The regions that currently don't have favored node assignment. We will
173       //    need to come up with favored nodes assignments for them. The corner case
174       //    in (1) above is that all the nodes are unavailable and in that case, we
175       //    will note that this region doesn't have favored nodes.
176       Pair<Map<ServerName,List<HRegionInfo>>, List<HRegionInfo>> segregatedRegions =
177           segregateRegionsAndAssignRegionsWithFavoredNodes(regions, servers);
178       Map<ServerName,List<HRegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst();
179       List<HRegionInfo> regionsWithNoFavoredNodes = segregatedRegions.getSecond();
180       assignmentMap = new HashMap<ServerName, List<HRegionInfo>>();
181       roundRobinAssignmentImpl(assignmentHelper, assignmentMap, regionsWithNoFavoredNodes,
182           servers);
183       // merge the assignment maps
184       assignmentMap.putAll(regionsWithFavoredNodesMap);
185     } catch (Exception ex) {
186       LOG.warn("Encountered exception while doing favored-nodes assignment " + ex +
187           " Falling back to regular assignment");
188       assignmentMap = super.roundRobinAssignment(regions, servers);
189     }
190     return assignmentMap;
191   }
192 
193   @Override
194   public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
195     try {
196       FavoredNodeAssignmentHelper assignmentHelper =
197           new FavoredNodeAssignmentHelper(servers, rackManager);
198       assignmentHelper.initialize();
199       ServerName primary = super.randomAssignment(regionInfo, servers);
200       if (!assignmentHelper.canPlaceFavoredNodes()) {
201         return primary;
202       }
203       List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo);
204       // check if we have a favored nodes mapping for this region and if so, return
205       // a server from the favored nodes list if the passed 'servers' contains this
206       // server as well (available servers, that is)
207       if (favoredNodes != null) {
208         for (ServerName s : favoredNodes) {
209           ServerName serverWithLegitStartCode = availableServersContains(servers, s);
210           if (serverWithLegitStartCode != null) {
211             return serverWithLegitStartCode;
212           }
213         }
214       }
215       List<HRegionInfo> regions = new ArrayList<HRegionInfo>(1);
216       regions.add(regionInfo);
217       Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<HRegionInfo, ServerName>(1);
218       primaryRSMap.put(regionInfo, primary);
219       assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap);
220       return primary;
221     } catch (Exception ex) {
222       LOG.warn("Encountered exception while doing favored-nodes (random)assignment " + ex +
223           " Falling back to regular assignment");
224       return super.randomAssignment(regionInfo, servers);
225     }
226   }
227 
228   private Pair<Map<ServerName, List<HRegionInfo>>, List<HRegionInfo>>
229   segregateRegionsAndAssignRegionsWithFavoredNodes(List<HRegionInfo> regions,
230       List<ServerName> availableServers) {
231     Map<ServerName, List<HRegionInfo>> assignmentMapForFavoredNodes =
232         new HashMap<ServerName, List<HRegionInfo>>(regions.size() / 2);
233     List<HRegionInfo> regionsWithNoFavoredNodes = new ArrayList<HRegionInfo>(regions.size()/2);
234     for (HRegionInfo region : regions) {
235       List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(region);
236       ServerName primaryHost = null;
237       ServerName secondaryHost = null;
238       ServerName tertiaryHost = null;
239       if (favoredNodes != null) {
240         for (ServerName s : favoredNodes) {
241           ServerName serverWithLegitStartCode = availableServersContains(availableServers, s);
242           if (serverWithLegitStartCode != null) {
243             FavoredNodesPlan.Position position =
244                 FavoredNodesPlan.getFavoredServerPosition(favoredNodes, s);
245             if (Position.PRIMARY.equals(position)) {
246               primaryHost = serverWithLegitStartCode;
247             } else if (Position.SECONDARY.equals(position)) {
248               secondaryHost = serverWithLegitStartCode;
249             } else if (Position.TERTIARY.equals(position)) {
250               tertiaryHost = serverWithLegitStartCode;
251             }
252           }
253         }
254         assignRegionToAvailableFavoredNode(assignmentMapForFavoredNodes, region,
255               primaryHost, secondaryHost, tertiaryHost);
256       }
257       if (primaryHost == null && secondaryHost == null && tertiaryHost == null) {
258         //all favored nodes unavailable
259         regionsWithNoFavoredNodes.add(region);
260       }
261     }
262     return new Pair<Map<ServerName, List<HRegionInfo>>, List<HRegionInfo>>(
263         assignmentMapForFavoredNodes, regionsWithNoFavoredNodes);
264   }
265 
266   // Do a check of the hostname and port and return the servername from the servers list
267   // that matched (the favoredNode will have a startcode of -1 but we want the real
268   // server with the legit startcode
269   private ServerName availableServersContains(List<ServerName> servers, ServerName favoredNode) {
270     for (ServerName server : servers) {
271       if (ServerName.isSameHostnameAndPort(favoredNode, server)) {
272         return server;
273       }
274     }
275     return null;
276   }
277 
278   private void assignRegionToAvailableFavoredNode(Map<ServerName,
279       List<HRegionInfo>> assignmentMapForFavoredNodes, HRegionInfo region, ServerName primaryHost,
280       ServerName secondaryHost, ServerName tertiaryHost) {
281     if (primaryHost != null) {
282       addRegionToMap(assignmentMapForFavoredNodes, region, primaryHost);
283     } else if (secondaryHost != null && tertiaryHost != null) {
284       // assign the region to the one with a lower load
285       // (both have the desired hdfs blocks)
286       ServerName s;
287       ServerLoad tertiaryLoad = super.services.getServerManager().getLoad(tertiaryHost);
288       ServerLoad secondaryLoad = super.services.getServerManager().getLoad(secondaryHost);
289       if (secondaryLoad.getLoad() < tertiaryLoad.getLoad()) {
290         s = secondaryHost;
291       } else {
292         s = tertiaryHost;
293       }
294       addRegionToMap(assignmentMapForFavoredNodes, region, s);
295     } else if (secondaryHost != null) {
296       addRegionToMap(assignmentMapForFavoredNodes, region, secondaryHost);
297     } else if (tertiaryHost != null) {
298       addRegionToMap(assignmentMapForFavoredNodes, region, tertiaryHost);
299     }
300   }
301 
302   private void addRegionToMap(Map<ServerName, List<HRegionInfo>> assignmentMapForFavoredNodes,
303       HRegionInfo region, ServerName host) {
304     List<HRegionInfo> regionsOnServer = null;
305     if ((regionsOnServer = assignmentMapForFavoredNodes.get(host)) == null) {
306       regionsOnServer = new ArrayList<HRegionInfo>();
307       assignmentMapForFavoredNodes.put(host, regionsOnServer);
308     }
309     regionsOnServer.add(region);
310   }
311 
312   public List<ServerName> getFavoredNodes(HRegionInfo regionInfo) {
313     return this.globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo);
314   }
315 
316   private void roundRobinAssignmentImpl(FavoredNodeAssignmentHelper assignmentHelper,
317       Map<ServerName, List<HRegionInfo>> assignmentMap,
318       List<HRegionInfo> regions, List<ServerName> servers) {
319     Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<HRegionInfo, ServerName>();
320     // figure the primary RSs
321     assignmentHelper.placePrimaryRSAsRoundRobin(assignmentMap, primaryRSMap, regions);
322     assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap);
323   }
324 
325   private void assignSecondaryAndTertiaryNodesForRegion(
326       FavoredNodeAssignmentHelper assignmentHelper,
327       List<HRegionInfo> regions, Map<HRegionInfo, ServerName> primaryRSMap) {
328     // figure the secondary and tertiary RSs
329     Map<HRegionInfo, ServerName[]> secondaryAndTertiaryRSMap =
330         assignmentHelper.placeSecondaryAndTertiaryRS(primaryRSMap);
331     // now record all the assignments so that we can serve queries later
332     for (HRegionInfo region : regions) {
333       // Store the favored nodes without startCode for the ServerName objects
334       // We don't care about the startcode; but only the hostname really
335       List<ServerName> favoredNodesForRegion = new ArrayList<ServerName>(3);
336       ServerName sn = primaryRSMap.get(region);
337       favoredNodesForRegion.add(ServerName.valueOf(sn.getHostname(), sn.getPort(),
338           ServerName.NON_STARTCODE));
339       ServerName[] secondaryAndTertiaryNodes = secondaryAndTertiaryRSMap.get(region);
340       if (secondaryAndTertiaryNodes != null) {
341         favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[0].getHostname(),
342             secondaryAndTertiaryNodes[0].getPort(), ServerName.NON_STARTCODE));
343         favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[1].getHostname(),
344             secondaryAndTertiaryNodes[1].getPort(), ServerName.NON_STARTCODE));
345       }
346       globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(region, favoredNodesForRegion);
347     }
348   }
349 
350   @Override
351   public List<RegionPlan> balanceCluster(TableName tableName,
352       Map<ServerName, List<HRegionInfo>> clusterState) throws HBaseIOException {
353     return balanceCluster(clusterState);
354   }
355 }