View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.procedure;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.OutputStream;
24  import java.security.PrivilegedExceptionAction;
25  import java.util.HashMap;
26  import java.util.HashSet;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.concurrent.atomic.AtomicBoolean;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.MetaTableAccessor;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.TableName;
37  import org.apache.hadoop.hbase.TableNotDisabledException;
38  import org.apache.hadoop.hbase.TableNotFoundException;
39  import org.apache.hadoop.hbase.TableStateManager;
40  import org.apache.hadoop.hbase.classification.InterfaceAudience;
41  import org.apache.hadoop.hbase.executor.EventType;
42  import org.apache.hadoop.hbase.exceptions.HBaseException;
43  import org.apache.hadoop.hbase.master.AssignmentManager;
44  import org.apache.hadoop.hbase.master.BulkAssigner;
45  import org.apache.hadoop.hbase.master.GeneralBulkAssigner;
46  import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
47  import org.apache.hadoop.hbase.master.MasterServices;
48  import org.apache.hadoop.hbase.master.RegionStates;
49  import org.apache.hadoop.hbase.master.ServerManager;
50  import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
51  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
52  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
53  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.EnableTableState;
54  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
55  import org.apache.hadoop.hbase.util.Pair;
56  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
57  import org.apache.hadoop.security.UserGroupInformation;
58  
59  @InterfaceAudience.Private
60  public class EnableTableProcedure
61      extends StateMachineProcedure<MasterProcedureEnv, EnableTableState>
62      implements TableProcedureInterface {
63    private static final Log LOG = LogFactory.getLog(EnableTableProcedure.class);
64  
65    private final AtomicBoolean aborted = new AtomicBoolean(false);
66  
67    // This is for back compatible with 1.0 asynchronized operations.
68    private final ProcedurePrepareLatch syncLatch;
69  
70    private TableName tableName;
71    private boolean skipTableStateCheck;
72    private UserGroupInformation user;
73  
74    private Boolean traceEnabled = null;
75  
76    public EnableTableProcedure() {
77      syncLatch = null;
78    }
79  
80    /**
81     * Constructor
82     * @param env MasterProcedureEnv
83     * @param tableName the table to operate on
84     * @param skipTableStateCheck whether to check table state
85     * @throws IOException
86     */
87    public EnableTableProcedure(
88        final MasterProcedureEnv env,
89        final TableName tableName,
90        final boolean skipTableStateCheck) throws IOException {
91      this(env, tableName, skipTableStateCheck, null);
92    }
93  
94    /**
95     * Constructor
96     * @param env MasterProcedureEnv
97     * @throws IOException
98     * @param tableName the table to operate on
99     * @param skipTableStateCheck whether to check table state
100    */
101   public EnableTableProcedure(
102       final MasterProcedureEnv env,
103       final TableName tableName,
104       final boolean skipTableStateCheck,
105       final ProcedurePrepareLatch syncLatch) throws IOException {
106     this.tableName = tableName;
107     this.skipTableStateCheck = skipTableStateCheck;
108     this.user = env.getRequestUser().getUGI();
109     this.setOwner(this.user.getShortUserName());
110 
111     // Compatible with 1.0: We use latch to make sure that this procedure implementation is
112     // compatible with 1.0 asynchronized operations. We need to lock the table and check
113     // whether the Enable operation could be performed (table exists and offline; table state
114     // is DISABLED). Once it is done, we are good to release the latch and the client can
115     // start asynchronously wait for the operation.
116     //
117     // Note: the member syncLatch could be null if we are in failover or recovery scenario.
118     // This is ok for backward compatible, as 1.0 client would not able to peek at procedure.
119     this.syncLatch = syncLatch;
120   }
121 
122   @Override
123   protected Flow executeFromState(final MasterProcedureEnv env, final EnableTableState state) {
124     if (isTraceEnabled()) {
125       LOG.trace(this + " execute state=" + state);
126     }
127 
128     try {
129       switch (state) {
130       case ENABLE_TABLE_PREPARE:
131         if (prepareEnable(env)) {
132           setNextState(EnableTableState.ENABLE_TABLE_PRE_OPERATION);
133         } else {
134           assert isFailed() : "enable should have an exception here";
135           return Flow.NO_MORE_STATE;
136         }
137         break;
138       case ENABLE_TABLE_PRE_OPERATION:
139         preEnable(env, state);
140         setNextState(EnableTableState.ENABLE_TABLE_SET_ENABLING_TABLE_STATE);
141         break;
142       case ENABLE_TABLE_SET_ENABLING_TABLE_STATE:
143         setTableStateToEnabling(env, tableName);
144         setNextState(EnableTableState.ENABLE_TABLE_MARK_REGIONS_ONLINE);
145         break;
146       case ENABLE_TABLE_MARK_REGIONS_ONLINE:
147         markRegionsOnline(env, tableName, true);
148         setNextState(EnableTableState.ENABLE_TABLE_SET_ENABLED_TABLE_STATE);
149         break;
150       case ENABLE_TABLE_SET_ENABLED_TABLE_STATE:
151         setTableStateToEnabled(env, tableName);
152         setNextState(EnableTableState.ENABLE_TABLE_POST_OPERATION);
153         break;
154       case ENABLE_TABLE_POST_OPERATION:
155         postEnable(env, state);
156         return Flow.NO_MORE_STATE;
157       default:
158         throw new UnsupportedOperationException("unhandled state=" + state);
159       }
160     } catch (InterruptedException|HBaseException|IOException e) {
161       LOG.error("Error trying to enable table=" + tableName + " state=" + state, e);
162       setFailure("master-enable-table", e);
163     }
164     return Flow.HAS_MORE_STATE;
165   }
166 
167   @Override
168   protected void rollbackState(final MasterProcedureEnv env, final EnableTableState state)
169       throws IOException {
170     if (isTraceEnabled()) {
171       LOG.trace(this + " rollback state=" + state);
172     }
173     try {
174       switch (state) {
175       case ENABLE_TABLE_POST_OPERATION:
176         // TODO-MAYBE: call the coprocessor event to undo (eg. DisableTableProcedure.preDisable())?
177         break;
178       case ENABLE_TABLE_SET_ENABLED_TABLE_STATE:
179         DisableTableProcedure.setTableStateToDisabling(env, tableName);
180         break;
181       case ENABLE_TABLE_MARK_REGIONS_ONLINE:
182         markRegionsOfflineDuringRecovery(env);
183         break;
184       case ENABLE_TABLE_SET_ENABLING_TABLE_STATE:
185         DisableTableProcedure.setTableStateToDisabled(env, tableName);
186         break;
187       case ENABLE_TABLE_PRE_OPERATION:
188         // TODO-MAYBE: call the coprocessor event to undo (eg. DisableTableProcedure.postDisable())?
189         break;
190       case ENABLE_TABLE_PREPARE:
191         // Nothing to undo for this state.
192         // We do need to count down the latch count so that we don't stuck.
193         ProcedurePrepareLatch.releaseLatch(syncLatch, this);
194         break;
195       default:
196         throw new UnsupportedOperationException("unhandled state=" + state);
197       }
198     } catch (HBaseException e) {
199       LOG.warn("Failed enable table rollback attempt step=" + state + " table=" + tableName, e);
200       throw new IOException(e);
201     } catch (IOException e) {
202       // This will be retried. Unless there is a bug in the code,
203       // this should be just a "temporary error" (e.g. network down)
204       LOG.warn("Failed enable table rollback attempt step=" + state + " table=" + tableName, e);
205       throw e;
206     }
207   }
208 
209   @Override
210   protected EnableTableState getState(final int stateId) {
211     return EnableTableState.valueOf(stateId);
212   }
213 
214   @Override
215   protected int getStateId(final EnableTableState state) {
216     return state.getNumber();
217   }
218 
219   @Override
220   protected EnableTableState getInitialState() {
221     return EnableTableState.ENABLE_TABLE_PREPARE;
222   }
223 
224   @Override
225   protected void setNextState(final EnableTableState state) {
226     if (aborted.get()) {
227       setAbortFailure("Enable-table", "abort requested");
228     } else {
229       super.setNextState(state);
230     }
231   }
232 
233   @Override
234   public boolean abort(final MasterProcedureEnv env) {
235     aborted.set(true);
236     return true;
237   }
238 
239   @Override
240   protected boolean acquireLock(final MasterProcedureEnv env) {
241     if (!env.isInitialized()) return false;
242     return env.getProcedureQueue().tryAcquireTableWrite(
243       tableName,
244       EventType.C_M_ENABLE_TABLE.toString());
245   }
246 
247   @Override
248   protected void releaseLock(final MasterProcedureEnv env) {
249     env.getProcedureQueue().releaseTableWrite(tableName);
250   }
251 
252   @Override
253   public void serializeStateData(final OutputStream stream) throws IOException {
254     super.serializeStateData(stream);
255 
256     MasterProcedureProtos.EnableTableStateData.Builder enableTableMsg =
257         MasterProcedureProtos.EnableTableStateData.newBuilder()
258             .setUserInfo(MasterProcedureUtil.toProtoUserInfo(user))
259             .setTableName(ProtobufUtil.toProtoTableName(tableName))
260             .setSkipTableStateCheck(skipTableStateCheck);
261 
262     enableTableMsg.build().writeDelimitedTo(stream);
263   }
264 
265   @Override
266   public void deserializeStateData(final InputStream stream) throws IOException {
267     super.deserializeStateData(stream);
268 
269     MasterProcedureProtos.EnableTableStateData enableTableMsg =
270         MasterProcedureProtos.EnableTableStateData.parseDelimitedFrom(stream);
271     user = MasterProcedureUtil.toUserInfo(enableTableMsg.getUserInfo());
272     tableName = ProtobufUtil.toTableName(enableTableMsg.getTableName());
273     skipTableStateCheck = enableTableMsg.getSkipTableStateCheck();
274   }
275 
276   @Override
277   public void toStringClassDetails(StringBuilder sb) {
278     sb.append(getClass().getSimpleName());
279     sb.append(" (table=");
280     sb.append(tableName);
281     sb.append(")");
282   }
283 
284   @Override
285   public TableName getTableName() {
286     return tableName;
287   }
288 
289   @Override
290   public TableOperationType getTableOperationType() {
291     return TableOperationType.ENABLE;
292   }
293 
294 
295   /**
296    * Action before any real action of enabling table. Set the exception in the procedure instead
297    * of throwing it.  This approach is to deal with backward compatible with 1.0.
298    * @param env MasterProcedureEnv
299    * @return whether the table passes the necessary checks
300    * @throws IOException
301    */
302   private boolean prepareEnable(final MasterProcedureEnv env) throws IOException {
303     boolean canTableBeEnabled = true;
304 
305     // Check whether table exists
306     if (!MetaTableAccessor.tableExists(env.getMasterServices().getConnection(), tableName)) {
307       setFailure("master-enable-table", new TableNotFoundException(tableName));
308       canTableBeEnabled = false;
309     } else if (!skipTableStateCheck) {
310       // There could be multiple client requests trying to disable or enable
311       // the table at the same time. Ensure only the first request is honored
312       // After that, no other requests can be accepted until the table reaches
313       // DISABLED or ENABLED.
314       //
315       // Note: in 1.0 release, we called TableStateManager.setTableStateIfInStates() to set
316       // the state to ENABLING from DISABLED. The implementation was done before table lock
317       // was implemented. With table lock, there is no need to set the state here (it will
318       // set the state later on). A quick state check should be enough for us to move forward.
319       TableStateManager tsm = env.getMasterServices().getAssignmentManager().getTableStateManager();
320       if (!tsm.isTableState(tableName, ZooKeeperProtos.Table.State.DISABLED)) {
321         LOG.info("Table " + tableName + " isn't disabled; skipping enable");
322         setFailure("master-enable-table", new TableNotDisabledException(this.tableName));
323         canTableBeEnabled = false;
324       }
325     }
326 
327     // We are done the check. Future actions in this procedure could be done asynchronously.
328     ProcedurePrepareLatch.releaseLatch(syncLatch, this);
329 
330     return canTableBeEnabled;
331   }
332 
333   /**
334    * Action before enabling table.
335    * @param env MasterProcedureEnv
336    * @param state the procedure state
337    * @throws IOException
338    * @throws InterruptedException
339    */
340   private void preEnable(final MasterProcedureEnv env, final EnableTableState state)
341       throws IOException, InterruptedException {
342     runCoprocessorAction(env, state);
343   }
344 
345   /**
346    * Mark table state to Enabling
347    * @param env MasterProcedureEnv
348    * @param tableName the target table
349    * @throws IOException
350    */
351   protected static void setTableStateToEnabling(
352       final MasterProcedureEnv env,
353       final TableName tableName) throws HBaseException, IOException {
354     // Set table disabling flag up in zk.
355     LOG.info("Attempting to enable the table " + tableName);
356     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
357       tableName,
358       ZooKeeperProtos.Table.State.ENABLING);
359   }
360 
361   /**
362    * Mark offline regions of the table online with retry
363    * @param env MasterProcedureEnv
364    * @param tableName the target table
365    * @param retryRequired whether to retry if the first run failed
366    * @throws IOException
367    */
368   protected static void markRegionsOnline(
369       final MasterProcedureEnv env,
370       final TableName tableName,
371       final Boolean retryRequired) throws IOException {
372     // This is best effort approach to make all regions of a table online.  If we fail to do
373     // that, it is ok that the table has some offline regions; user can fix it manually.
374 
375     // Dev consideration: add a config to control max number of retry. For now, it is hard coded.
376     int maxTry = (retryRequired ? 10 : 1);
377     boolean done = false;
378 
379     do {
380       try {
381         done = markRegionsOnline(env, tableName);
382         if (done) {
383           break;
384         }
385         maxTry--;
386       } catch (Exception e) {
387         LOG.warn("Received exception while marking regions online. tries left: " + maxTry, e);
388         maxTry--;
389         if (maxTry > 0) {
390           continue; // we still have some retry left, try again.
391         }
392         throw e;
393       }
394     } while (maxTry > 0);
395 
396     if (!done) {
397       LOG.warn("Some or all regions of the Table '" + tableName + "' were offline");
398     }
399   }
400 
401   /**
402    * Mark offline regions of the table online
403    * @param env MasterProcedureEnv
404    * @param tableName the target table
405    * @return whether the operation is fully completed or being interrupted.
406    * @throws IOException
407    */
408   private static boolean markRegionsOnline(final MasterProcedureEnv env, final TableName tableName)
409       throws IOException {
410     final AssignmentManager assignmentManager = env.getMasterServices().getAssignmentManager();
411     final MasterServices masterServices = env.getMasterServices();
412     final ServerManager serverManager = masterServices.getServerManager();
413     boolean done = false;
414     // Get the regions of this table. We're done when all listed
415     // tables are onlined.
416     List<Pair<HRegionInfo, ServerName>> tableRegionsAndLocations;
417 
418     if (TableName.META_TABLE_NAME.equals(tableName)) {
419       tableRegionsAndLocations =
420           new MetaTableLocator().getMetaRegionsAndLocations(masterServices.getZooKeeper());
421     } else {
422       tableRegionsAndLocations =
423           MetaTableAccessor.getTableRegionsAndLocations(
424             masterServices.getZooKeeper(), masterServices.getConnection(), tableName, true);
425     }
426 
427     int countOfRegionsInTable = tableRegionsAndLocations.size();
428     Map<HRegionInfo, ServerName> regionsToAssign =
429         regionsToAssignWithServerName(env, tableRegionsAndLocations);
430 
431     // need to potentially create some regions for the replicas
432     List<HRegionInfo> unrecordedReplicas =
433         AssignmentManager.replicaRegionsNotRecordedInMeta(new HashSet<HRegionInfo>(
434             regionsToAssign.keySet()), masterServices);
435     Map<ServerName, List<HRegionInfo>> srvToUnassignedRegs =
436         assignmentManager.getBalancer().roundRobinAssignment(unrecordedReplicas,
437           serverManager.getOnlineServersList());
438     if (srvToUnassignedRegs != null) {
439       for (Map.Entry<ServerName, List<HRegionInfo>> entry : srvToUnassignedRegs.entrySet()) {
440         for (HRegionInfo h : entry.getValue()) {
441           regionsToAssign.put(h, entry.getKey());
442         }
443       }
444     }
445 
446     int offlineRegionsCount = regionsToAssign.size();
447 
448     LOG.info("Table '" + tableName + "' has " + countOfRegionsInTable + " regions, of which "
449         + offlineRegionsCount + " are offline.");
450     if (offlineRegionsCount == 0) {
451       return true;
452     }
453 
454     List<ServerName> onlineServers = serverManager.createDestinationServersList();
455     Map<ServerName, List<HRegionInfo>> bulkPlan =
456         env.getMasterServices().getAssignmentManager().getBalancer()
457             .retainAssignment(regionsToAssign, onlineServers);
458     if (bulkPlan != null) {
459       LOG.info("Bulk assigning " + offlineRegionsCount + " region(s) across " + bulkPlan.size()
460           + " server(s), retainAssignment=true");
461 
462       BulkAssigner ba = new GeneralBulkAssigner(masterServices, bulkPlan, assignmentManager, true);
463       try {
464         if (ba.bulkAssign()) {
465           done = true;
466         }
467       } catch (InterruptedException e) {
468         LOG.warn("Enable operation was interrupted when enabling table '" + tableName + "'");
469         // Preserve the interrupt.
470         Thread.currentThread().interrupt();
471       }
472     } else {
473       LOG.info("Balancer was unable to find suitable servers for table " + tableName
474           + ", leaving unassigned");
475     }
476     return done;
477   }
478 
479   /**
480    * Mark regions of the table offline during recovery
481    * @param env MasterProcedureEnv
482    */
483   private void markRegionsOfflineDuringRecovery(final MasterProcedureEnv env) {
484     try {
485       // This is a best effort attempt. We will move on even it does not succeed. We will retry
486       // several times until we giving up.
487       DisableTableProcedure.markRegionsOffline(env, tableName, true);
488     } catch (Exception e) {
489       LOG.debug("Failed to offline all regions of table " + tableName + ". Ignoring", e);
490     }
491   }
492 
493   /**
494    * Mark table state to Enabled
495    * @param env MasterProcedureEnv
496    * @throws IOException
497    */
498   protected static void setTableStateToEnabled(
499       final MasterProcedureEnv env,
500       final TableName tableName) throws HBaseException, IOException {
501     // Flip the table to Enabled
502     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
503       tableName,
504       ZooKeeperProtos.Table.State.ENABLED);
505     LOG.info("Table '" + tableName + "' was successfully enabled.");
506   }
507 
508   /**
509    * Action after enabling table.
510    * @param env MasterProcedureEnv
511    * @param state the procedure state
512    * @throws IOException
513    * @throws InterruptedException
514    */
515   private void postEnable(final MasterProcedureEnv env, final EnableTableState state)
516       throws IOException, InterruptedException {
517     runCoprocessorAction(env, state);
518   }
519 
520   /**
521    * The procedure could be restarted from a different machine. If the variable is null, we need to
522    * retrieve it.
523    * @return traceEnabled
524    */
525   private Boolean isTraceEnabled() {
526     if (traceEnabled == null) {
527       traceEnabled = LOG.isTraceEnabled();
528     }
529     return traceEnabled;
530   }
531 
532   /**
533    * @param regionsInMeta
534    * @return List of regions neither in transition nor assigned.
535    * @throws IOException
536    */
537   private static Map<HRegionInfo, ServerName> regionsToAssignWithServerName(
538       final MasterProcedureEnv env,
539       final List<Pair<HRegionInfo, ServerName>> regionsInMeta) throws IOException {
540     Map<HRegionInfo, ServerName> regionsToAssign =
541         new HashMap<HRegionInfo, ServerName>(regionsInMeta.size());
542     RegionStates regionStates = env.getMasterServices().getAssignmentManager().getRegionStates();
543     for (Pair<HRegionInfo, ServerName> regionLocation : regionsInMeta) {
544       HRegionInfo hri = regionLocation.getFirst();
545       ServerName sn = regionLocation.getSecond();
546       if (regionStates.isRegionOffline(hri)) {
547         regionsToAssign.put(hri, sn);
548       } else {
549         if (LOG.isDebugEnabled()) {
550           LOG.debug("Skipping assign for the region " + hri + " during enable table "
551               + hri.getTable() + " because its already in tranition or assigned.");
552         }
553       }
554     }
555     return regionsToAssign;
556   }
557 
558   /**
559    * Coprocessor Action.
560    * @param env MasterProcedureEnv
561    * @param state the procedure state
562    * @throws IOException
563    * @throws InterruptedException
564    */
565   private void runCoprocessorAction(final MasterProcedureEnv env, final EnableTableState state)
566       throws IOException, InterruptedException {
567     final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
568     if (cpHost != null) {
569       user.doAs(new PrivilegedExceptionAction<Void>() {
570         @Override
571         public Void run() throws Exception {
572           switch (state) {
573           case ENABLE_TABLE_PRE_OPERATION:
574             cpHost.preEnableTableHandler(getTableName());
575             break;
576           case ENABLE_TABLE_POST_OPERATION:
577             cpHost.postEnableTableHandler(getTableName());
578             break;
579           default:
580             throw new UnsupportedOperationException(this + " unhandled state=" + state);
581           }
582           return null;
583         }
584       });
585     }
586   }
587 }