View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.procedure;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.OutputStream;
24  import java.security.PrivilegedExceptionAction;
25  import java.util.List;
26  import java.util.concurrent.ExecutorService;
27  import java.util.concurrent.atomic.AtomicBoolean;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.hbase.HRegionInfo;
32  import org.apache.hadoop.hbase.MetaTableAccessor;
33  import org.apache.hadoop.hbase.TableName;
34  import org.apache.hadoop.hbase.TableNotEnabledException;
35  import org.apache.hadoop.hbase.TableNotFoundException;
36  import org.apache.hadoop.hbase.TableStateManager;
37  import org.apache.hadoop.hbase.classification.InterfaceAudience;
38  import org.apache.hadoop.hbase.constraint.ConstraintException;
39  import org.apache.hadoop.hbase.executor.EventType;
40  import org.apache.hadoop.hbase.exceptions.HBaseException;
41  import org.apache.hadoop.hbase.master.AssignmentManager;
42  import org.apache.hadoop.hbase.master.BulkAssigner;
43  import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
44  import org.apache.hadoop.hbase.master.RegionState;
45  import org.apache.hadoop.hbase.master.RegionStates;
46  import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
47  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
48  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
49  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DisableTableState;
50  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
51  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
52  import org.apache.hadoop.security.UserGroupInformation;
53  import org.apache.htrace.Trace;
54  
55  @InterfaceAudience.Private
56  public class DisableTableProcedure
57      extends StateMachineProcedure<MasterProcedureEnv, DisableTableState>
58      implements TableProcedureInterface {
59    private static final Log LOG = LogFactory.getLog(DisableTableProcedure.class);
60  
61    private final AtomicBoolean aborted = new AtomicBoolean(false);
62  
63    // This is for back compatible with 1.0 asynchronized operations.
64    private final ProcedurePrepareLatch syncLatch;
65  
66    private TableName tableName;
67    private boolean skipTableStateCheck;
68    private UserGroupInformation user;
69  
70    private Boolean traceEnabled = null;
71  
72    enum MarkRegionOfflineOpResult {
73      MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL,
74      BULK_ASSIGN_REGIONS_FAILED,
75      MARK_ALL_REGIONS_OFFLINE_INTERRUPTED,
76    }
77  
78    public DisableTableProcedure() {
79      syncLatch = null;
80    }
81  
82    /**
83     * Constructor
84     * @param env MasterProcedureEnv
85     * @param tableName the table to operate on
86     * @param skipTableStateCheck whether to check table state
87     * @throws IOException
88     */
89    public DisableTableProcedure(
90        final MasterProcedureEnv env,
91        final TableName tableName,
92        final boolean skipTableStateCheck) throws IOException {
93      this(env, tableName, skipTableStateCheck, null);
94    }
95  
96    /**
97     * Constructor
98     * @param env MasterProcedureEnv
99     * @param tableName the table to operate on
100    * @param skipTableStateCheck whether to check table state
101    * @throws IOException
102    */
103   public DisableTableProcedure(
104       final MasterProcedureEnv env,
105       final TableName tableName,
106       final boolean skipTableStateCheck,
107       final ProcedurePrepareLatch syncLatch) throws IOException {
108     this.tableName = tableName;
109     this.skipTableStateCheck = skipTableStateCheck;
110     this.user = env.getRequestUser().getUGI();
111     this.setOwner(this.user.getShortUserName());
112 
113     // Compatible with 1.0: We use latch to make sure that this procedure implementation is
114     // compatible with 1.0 asynchronized operations. We need to lock the table and check
115     // whether the Disable operation could be performed (table exists and online; table state
116     // is ENABLED). Once it is done, we are good to release the latch and the client can
117     // start asynchronously wait for the operation.
118     //
119     // Note: the member syncLatch could be null if we are in failover or recovery scenario.
120     // This is ok for backward compatible, as 1.0 client would not able to peek at procedure.
121     this.syncLatch = syncLatch;
122   }
123 
124   @Override
125   protected Flow executeFromState(final MasterProcedureEnv env, final DisableTableState state) {
126     if (isTraceEnabled()) {
127       LOG.trace(this + " execute state=" + state);
128     }
129 
130     try {
131       switch (state) {
132       case DISABLE_TABLE_PREPARE:
133         if (prepareDisable(env)) {
134           setNextState(DisableTableState.DISABLE_TABLE_PRE_OPERATION);
135         } else {
136           assert isFailed() : "disable should have an exception here";
137           return Flow.NO_MORE_STATE;
138         }
139         break;
140       case DISABLE_TABLE_PRE_OPERATION:
141         preDisable(env, state);
142         setNextState(DisableTableState.DISABLE_TABLE_SET_DISABLING_TABLE_STATE);
143         break;
144       case DISABLE_TABLE_SET_DISABLING_TABLE_STATE:
145         setTableStateToDisabling(env, tableName);
146         setNextState(DisableTableState.DISABLE_TABLE_MARK_REGIONS_OFFLINE);
147         break;
148       case DISABLE_TABLE_MARK_REGIONS_OFFLINE:
149         if (markRegionsOffline(env, tableName, true) ==
150             MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL) {
151           setNextState(DisableTableState.DISABLE_TABLE_SET_DISABLED_TABLE_STATE);
152         } else {
153           LOG.trace("Retrying later to disable the missing regions");
154         }
155         break;
156       case DISABLE_TABLE_SET_DISABLED_TABLE_STATE:
157         setTableStateToDisabled(env, tableName);
158         setNextState(DisableTableState.DISABLE_TABLE_POST_OPERATION);
159         break;
160       case DISABLE_TABLE_POST_OPERATION:
161         postDisable(env, state);
162         return Flow.NO_MORE_STATE;
163       default:
164         throw new UnsupportedOperationException("unhandled state=" + state);
165       }
166     } catch (InterruptedException|HBaseException|IOException e) {
167       LOG.warn("Retriable error trying to disable table=" + tableName + " state=" + state, e);
168     }
169     return Flow.HAS_MORE_STATE;
170   }
171 
172   @Override
173   protected void rollbackState(final MasterProcedureEnv env, final DisableTableState state)
174       throws IOException {
175     if (state == DisableTableState.DISABLE_TABLE_PREPARE) {
176       undoTableStateChange(env);
177       ProcedurePrepareLatch.releaseLatch(syncLatch, this);
178       return;
179     }
180 
181     // The delete doesn't have a rollback. The execution will succeed, at some point.
182     throw new UnsupportedOperationException("unhandled state=" + state);
183   }
184 
185   @Override
186   protected DisableTableState getState(final int stateId) {
187     return DisableTableState.valueOf(stateId);
188   }
189 
190   @Override
191   protected int getStateId(final DisableTableState state) {
192     return state.getNumber();
193   }
194 
195   @Override
196   protected DisableTableState getInitialState() {
197     return DisableTableState.DISABLE_TABLE_PREPARE;
198   }
199 
200   @Override
201   protected void setNextState(final DisableTableState state) {
202     if (aborted.get()) {
203       setAbortFailure("disable-table", "abort requested");
204     } else {
205       super.setNextState(state);
206     }
207   }
208 
209   @Override
210   public boolean abort(final MasterProcedureEnv env) {
211     aborted.set(true);
212     return true;
213   }
214 
215   @Override
216   protected boolean acquireLock(final MasterProcedureEnv env) {
217     if (!env.isInitialized()) return false;
218     return env.getProcedureQueue().tryAcquireTableWrite(
219       tableName,
220       EventType.C_M_DISABLE_TABLE.toString());
221   }
222 
223   @Override
224   protected void releaseLock(final MasterProcedureEnv env) {
225     env.getProcedureQueue().releaseTableWrite(tableName);
226   }
227 
228   @Override
229   public void serializeStateData(final OutputStream stream) throws IOException {
230     super.serializeStateData(stream);
231 
232     MasterProcedureProtos.DisableTableStateData.Builder disableTableMsg =
233         MasterProcedureProtos.DisableTableStateData.newBuilder()
234             .setUserInfo(MasterProcedureUtil.toProtoUserInfo(user))
235             .setTableName(ProtobufUtil.toProtoTableName(tableName))
236             .setSkipTableStateCheck(skipTableStateCheck);
237 
238     disableTableMsg.build().writeDelimitedTo(stream);
239   }
240 
241   @Override
242   public void deserializeStateData(final InputStream stream) throws IOException {
243     super.deserializeStateData(stream);
244 
245     MasterProcedureProtos.DisableTableStateData disableTableMsg =
246         MasterProcedureProtos.DisableTableStateData.parseDelimitedFrom(stream);
247     user = MasterProcedureUtil.toUserInfo(disableTableMsg.getUserInfo());
248     tableName = ProtobufUtil.toTableName(disableTableMsg.getTableName());
249     skipTableStateCheck = disableTableMsg.getSkipTableStateCheck();
250   }
251 
252   @Override
253   public void toStringClassDetails(StringBuilder sb) {
254     sb.append(getClass().getSimpleName());
255     sb.append(" (table=");
256     sb.append(tableName);
257     sb.append(")");
258   }
259 
260   @Override
261   public TableName getTableName() {
262     return tableName;
263   }
264 
265   @Override
266   public TableOperationType getTableOperationType() {
267     return TableOperationType.DISABLE;
268   }
269 
270   /**
271    * Action before any real action of disabling table. Set the exception in the procedure instead
272    * of throwing it.  This approach is to deal with backward compatible with 1.0.
273    * @param env MasterProcedureEnv
274    * @throws HBaseException
275    * @throws IOException
276    */
277   private boolean prepareDisable(final MasterProcedureEnv env) throws HBaseException, IOException {
278     boolean canTableBeDisabled = true;
279     if (tableName.equals(TableName.META_TABLE_NAME)) {
280       setFailure("master-disable-table", new ConstraintException("Cannot disable catalog table"));
281       canTableBeDisabled = false;
282     } else if (!MetaTableAccessor.tableExists(env.getMasterServices().getConnection(), tableName)) {
283       setFailure("master-disable-table", new TableNotFoundException(tableName));
284       canTableBeDisabled = false;
285     } else if (!skipTableStateCheck) {
286       // There could be multiple client requests trying to disable or enable
287       // the table at the same time. Ensure only the first request is honored
288       // After that, no other requests can be accepted until the table reaches
289       // DISABLED or ENABLED.
290       //
291       // Note: A quick state check should be enough for us to move forward. However, instead of
292       // calling TableStateManager.isTableState() to just check the state, we called
293       // TableStateManager.setTableStateIfInStates() to set the state to DISABLING from ENABLED.
294       // This is because we treat empty state as enabled from 0.92-clusters. See
295       // ZKTableStateManager.setTableStateIfInStates() that has a hack solution to work around
296       // this issue.
297       TableStateManager tsm =
298         env.getMasterServices().getAssignmentManager().getTableStateManager();
299       if (!tsm.setTableStateIfInStates(tableName, ZooKeeperProtos.Table.State.DISABLING,
300             ZooKeeperProtos.Table.State.DISABLING, ZooKeeperProtos.Table.State.ENABLED)) {
301         LOG.info("Table " + tableName + " isn't enabled; skipping disable");
302         setFailure("master-disable-table", new TableNotEnabledException(tableName));
303         canTableBeDisabled = false;
304       }
305     }
306 
307     // We are done the check. Future actions in this procedure could be done asynchronously.
308     ProcedurePrepareLatch.releaseLatch(syncLatch, this);
309 
310     return canTableBeDisabled;
311   }
312 
313   /**
314    * Rollback of table state change in prepareDisable()
315    * @param env MasterProcedureEnv
316    */
317   private void undoTableStateChange(final MasterProcedureEnv env) {
318     if (!skipTableStateCheck) {
319       try {
320         // If the state was changed, undo it.
321         if (env.getMasterServices().getAssignmentManager().getTableStateManager().isTableState(
322             tableName, ZooKeeperProtos.Table.State.DISABLING)) {
323           EnableTableProcedure.setTableStateToEnabled(env, tableName);
324         }
325       } catch (Exception e) {
326         // Ignore exception.
327       }
328     }
329   }
330 
331   /**
332    * Action before disabling table.
333    * @param env MasterProcedureEnv
334    * @param state the procedure state
335    * @throws IOException
336    * @throws InterruptedException
337    */
338   protected void preDisable(final MasterProcedureEnv env, final DisableTableState state)
339       throws IOException, InterruptedException {
340     runCoprocessorAction(env, state);
341   }
342 
343   /**
344    * Mark table state to Disabling
345    * @param env MasterProcedureEnv
346    * @throws IOException
347    */
348   protected static void setTableStateToDisabling(
349       final MasterProcedureEnv env,
350       final TableName tableName) throws HBaseException, IOException {
351     // Set table disabling flag up in zk.
352     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
353       tableName,
354       ZooKeeperProtos.Table.State.DISABLING);
355   }
356 
357   /**
358    * Mark regions of the table offline with retries
359    * @param env MasterProcedureEnv
360    * @param tableName the target table
361    * @param retryRequired whether to retry if the first run failed
362    * @return whether the operation is fully completed or being interrupted.
363    * @throws IOException
364    */
365   protected static MarkRegionOfflineOpResult markRegionsOffline(
366       final MasterProcedureEnv env,
367       final TableName tableName,
368       final Boolean retryRequired) throws IOException {
369     // Dev consideration: add a config to control max number of retry. For now, it is hard coded.
370     int maxTry = (retryRequired ? 10 : 1);
371     MarkRegionOfflineOpResult operationResult =
372         MarkRegionOfflineOpResult.BULK_ASSIGN_REGIONS_FAILED;
373     do {
374       try {
375         operationResult = markRegionsOffline(env, tableName);
376         if (operationResult == MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL) {
377           break;
378         }
379         maxTry--;
380       } catch (Exception e) {
381         LOG.warn("Received exception while marking regions online. tries left: " + maxTry, e);
382         maxTry--;
383         if (maxTry > 0) {
384           continue; // we still have some retry left, try again.
385         }
386         throw e;
387       }
388     } while (maxTry > 0);
389 
390     if (operationResult != MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL) {
391       LOG.warn("Some or all regions of the Table '" + tableName + "' were still online");
392     }
393 
394     return operationResult;
395   }
396 
397   /**
398    * Mark regions of the table offline
399    * @param env MasterProcedureEnv
400    * @param tableName the target table
401    * @return whether the operation is fully completed or being interrupted.
402    * @throws IOException
403    */
404   private static MarkRegionOfflineOpResult markRegionsOffline(
405       final MasterProcedureEnv env,
406       final TableName tableName) throws IOException {
407     // Get list of online regions that are of this table.  Regions that are
408     // already closed will not be included in this list; i.e. the returned
409     // list is not ALL regions in a table, its all online regions according
410     // to the in-memory state on this master.
411     MarkRegionOfflineOpResult operationResult =
412         MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL;
413     final List<HRegionInfo> regions =
414         env.getMasterServices().getAssignmentManager().getRegionStates()
415             .getRegionsOfTable(tableName);
416     if (regions.size() > 0) {
417       LOG.info("Offlining " + regions.size() + " regions.");
418 
419       BulkDisabler bd = new BulkDisabler(env, tableName, regions);
420       try {
421         if (!bd.bulkAssign()) {
422           operationResult = MarkRegionOfflineOpResult.BULK_ASSIGN_REGIONS_FAILED;
423         }
424       } catch (InterruptedException e) {
425         LOG.warn("Disable was interrupted");
426         // Preserve the interrupt.
427         Thread.currentThread().interrupt();
428         operationResult = MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_INTERRUPTED;
429       }
430     }
431     return operationResult;
432   }
433 
434   /**
435    * Mark table state to Disabled
436    * @param env MasterProcedureEnv
437    * @throws IOException
438    */
439   protected static void setTableStateToDisabled(
440       final MasterProcedureEnv env,
441       final TableName tableName) throws HBaseException, IOException {
442     // Flip the table to disabled
443     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
444       tableName,
445       ZooKeeperProtos.Table.State.DISABLED);
446     LOG.info("Disabled table, " + tableName + ", is completed.");
447   }
448 
449   /**
450    * Action after disabling table.
451    * @param env MasterProcedureEnv
452    * @param state the procedure state
453    * @throws IOException
454    * @throws InterruptedException
455    */
456   protected void postDisable(final MasterProcedureEnv env, final DisableTableState state)
457       throws IOException, InterruptedException {
458     runCoprocessorAction(env, state);
459   }
460 
461   /**
462    * The procedure could be restarted from a different machine. If the variable is null, we need to
463    * retrieve it.
464    * @return traceEnabled
465    */
466   private Boolean isTraceEnabled() {
467     if (traceEnabled == null) {
468       traceEnabled = LOG.isTraceEnabled();
469     }
470     return traceEnabled;
471   }
472 
473   /**
474    * Coprocessor Action.
475    * @param env MasterProcedureEnv
476    * @param state the procedure state
477    * @throws IOException
478    * @throws InterruptedException
479    */
480   private void runCoprocessorAction(final MasterProcedureEnv env, final DisableTableState state)
481       throws IOException, InterruptedException {
482     final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
483     if (cpHost != null) {
484       user.doAs(new PrivilegedExceptionAction<Void>() {
485         @Override
486         public Void run() throws Exception {
487           switch (state) {
488           case DISABLE_TABLE_PRE_OPERATION:
489             cpHost.preDisableTableHandler(tableName);
490             break;
491           case DISABLE_TABLE_POST_OPERATION:
492             cpHost.postDisableTableHandler(tableName);
493             break;
494           default:
495             throw new UnsupportedOperationException(this + " unhandled state=" + state);
496           }
497           return null;
498         }
499       });
500     }
501   }
502 
503   /**
504    * Run bulk disable.
505    */
506   private static class BulkDisabler extends BulkAssigner {
507     private final AssignmentManager assignmentManager;
508     private final List<HRegionInfo> regions;
509     private final TableName tableName;
510     private final int waitingTimeForEvents;
511 
512     public BulkDisabler(final MasterProcedureEnv env, final TableName tableName,
513         final List<HRegionInfo> regions) {
514       super(env.getMasterServices());
515       this.assignmentManager = env.getMasterServices().getAssignmentManager();
516       this.tableName = tableName;
517       this.regions = regions;
518       this.waitingTimeForEvents =
519           env.getMasterServices().getConfiguration()
520               .getInt("hbase.master.event.waiting.time", 1000);
521     }
522 
523     @Override
524     protected void populatePool(ExecutorService pool) {
525       RegionStates regionStates = assignmentManager.getRegionStates();
526       for (final HRegionInfo region : regions) {
527         if (regionStates.isRegionInTransition(region)
528             && !regionStates.isRegionInState(region, RegionState.State.FAILED_CLOSE)) {
529           continue;
530         }
531         pool.execute(Trace.wrap("DisableTableHandler.BulkDisabler", new Runnable() {
532           @Override
533           public void run() {
534             assignmentManager.unassign(region);
535           }
536         }));
537       }
538     }
539 
540     @Override
541     protected boolean waitUntilDone(long timeout) throws InterruptedException {
542       long startTime = EnvironmentEdgeManager.currentTime();
543       long remaining = timeout;
544       List<HRegionInfo> regions = null;
545       long lastLogTime = startTime;
546       while (!server.isStopped() && remaining > 0) {
547         Thread.sleep(waitingTimeForEvents);
548         regions = assignmentManager.getRegionStates().getRegionsOfTable(tableName);
549         long now = EnvironmentEdgeManager.currentTime();
550         // Don't log more than once every ten seconds. Its obnoxious. And only log table regions
551         // if we are waiting a while for them to go down...
552         if (LOG.isDebugEnabled() && ((now - lastLogTime) > 10000)) {
553           lastLogTime = now;
554           LOG.debug("Disable waiting until done; " + remaining + " ms remaining; " + regions);
555         }
556         if (regions.isEmpty()) break;
557         remaining = timeout - (now - startTime);
558       }
559       return regions != null && regions.isEmpty();
560     }
561   }
562 }