View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.backup.master;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.OutputStream;
24  import java.util.ArrayList;
25  import java.util.HashMap;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.Map.Entry;
29  import java.util.concurrent.atomic.AtomicBoolean;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FileStatus;
35  import org.apache.hadoop.fs.FileSystem;
36  import org.apache.hadoop.fs.Path;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.NotServingRegionException;
39  import org.apache.hadoop.hbase.TableName;
40  import org.apache.hadoop.hbase.backup.util.BackupClientUtil;
41  import org.apache.hadoop.hbase.backup.BackupInfo;
42  import org.apache.hadoop.hbase.backup.BackupRestoreServerFactory;
43  import org.apache.hadoop.hbase.backup.BackupType;
44  import org.apache.hadoop.hbase.backup.HBackupFileSystem;
45  import org.apache.hadoop.hbase.backup.BackupInfo.BackupPhase;
46  import org.apache.hadoop.hbase.backup.BackupInfo.BackupState;
47  import org.apache.hadoop.hbase.backup.impl.BackupManifest.BackupImage;
48  import org.apache.hadoop.hbase.backup.BackupCopyService;
49  import org.apache.hadoop.hbase.backup.impl.BackupException;
50  import org.apache.hadoop.hbase.backup.impl.BackupManager;
51  import org.apache.hadoop.hbase.backup.impl.BackupManifest;
52  import org.apache.hadoop.hbase.backup.impl.BackupRestoreConstants;
53  import org.apache.hadoop.hbase.backup.util.BackupServerUtil;
54  import org.apache.hadoop.hbase.classification.InterfaceAudience;
55  import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
56  import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
57  import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface;
58  import org.apache.hadoop.hbase.procedure.MasterProcedureManager;
59  import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
60  import org.apache.hadoop.hbase.protobuf.generated.BackupProtos;
61  import org.apache.hadoop.hbase.protobuf.generated.BackupProtos.FullTableBackupState;
62  import org.apache.hadoop.hbase.protobuf.generated.BackupProtos.ServerTimestamp;
63  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
64  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
65  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
66  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
67  import org.apache.hadoop.hbase.util.FSUtils;
68  
69  @InterfaceAudience.Private
70  public class FullTableBackupProcedure
71      extends StateMachineProcedure<MasterProcedureEnv, FullTableBackupState>
72      implements TableProcedureInterface {
73    private static final Log LOG = LogFactory.getLog(FullTableBackupProcedure.class);
74    
75    private static final String SNAPSHOT_BACKUP_MAX_ATTEMPTS_KEY = "hbase.backup.snapshot.attempts.max";
76    private static final int DEFAULT_SNAPSHOT_BACKUP_MAX_ATTEMPTS = 10;
77    
78    private static final String SNAPSHOT_BACKUP_ATTEMPTS_DELAY_KEY = "hbase.backup.snapshot.attempts.delay";
79    private static final int DEFAULT_SNAPSHOT_BACKUP_ATTEMPTS_DELAY = 10000;
80    
81    private final AtomicBoolean aborted = new AtomicBoolean(false);
82    private Configuration conf;
83    private String backupId;
84    private List<TableName> tableList;
85    private String targetRootDir;
86    HashMap<String, Long> newTimestamps = null;
87  
88    private BackupManager backupManager;
89    private BackupInfo backupInfo;
90  
91    public FullTableBackupProcedure() {
92      // Required by the Procedure framework to create the procedure on replay
93    }
94  
95    public FullTableBackupProcedure(final MasterProcedureEnv env,
96        final String backupId, List<TableName> tableList, String targetRootDir, final int workers,
97        final long bandwidth) throws IOException {
98      backupManager = new BackupManager(env.getMasterConfiguration());
99      this.backupId = backupId;
100     this.tableList = tableList;
101     this.targetRootDir = targetRootDir;
102     backupInfo =
103         backupManager.createBackupInfo(backupId, BackupType.FULL, tableList, 
104           targetRootDir, workers, bandwidth);
105     if (tableList == null || tableList.isEmpty()) {
106       this.tableList = new ArrayList<>(backupInfo.getTables());
107     }
108   }
109 
110   @Override
111   public byte[] getResult() {
112     return backupId.getBytes();
113   }
114 
115   /**
116    * Begin the overall backup.
117    * @param backupInfo backup context
118    * @throws IOException exception
119    */
120   static void beginBackup(BackupManager backupManager, BackupInfo backupInfo)
121       throws IOException {
122     backupManager.setBackupInfo(backupInfo);
123     // set the start timestamp of the overall backup
124     long startTs = EnvironmentEdgeManager.currentTime();
125     backupInfo.setStartTs(startTs);
126     // set overall backup status: ongoing
127     backupInfo.setState(BackupState.RUNNING);
128     LOG.info("Backup " + backupInfo.getBackupId() + " started at " + startTs + ".");
129 
130     backupManager.updateBackupInfo(backupInfo);
131     if (LOG.isDebugEnabled()) {
132       LOG.debug("Backup session " + backupInfo.getBackupId() + " has been started.");
133     }
134   }
135   
136   private static String getMessage(Exception e) {
137     String msg = e.getMessage();
138     if (msg == null || msg.equals("")) {
139       msg = e.getClass().getName();
140     }
141     return msg;
142   }
143 
144   /**
145    * Delete HBase snapshot for backup.
146    * @param backupCtx backup context
147    * @throws Exception exception
148    */
149   private static void deleteSnapshot(final MasterProcedureEnv env,
150       BackupInfo backupCtx, Configuration conf)
151       throws IOException {
152     LOG.debug("Trying to delete snapshot for full backup.");
153     for (String snapshotName : backupCtx.getSnapshotNames()) {
154       if (snapshotName == null) {
155         continue;
156       }
157       LOG.debug("Trying to delete snapshot: " + snapshotName);
158       HBaseProtos.SnapshotDescription.Builder builder =
159           HBaseProtos.SnapshotDescription.newBuilder();
160       builder.setName(snapshotName);
161       try {
162         env.getMasterServices().getSnapshotManager().deleteSnapshot(builder.build());
163       } catch (IOException ioe) {
164         LOG.debug("when deleting snapshot " + snapshotName, ioe);
165       }
166       LOG.debug("Deleting the snapshot " + snapshotName + " for backup "
167           + backupCtx.getBackupId() + " succeeded.");
168     }
169   }
170 
171   /**
172    * Clean up directories with prefix "exportSnapshot-", which are generated when exporting
173    * snapshots.
174    * @throws IOException exception
175    */
176   private static void cleanupExportSnapshotLog(Configuration conf) throws IOException {
177     FileSystem fs = FSUtils.getCurrentFileSystem(conf);
178     Path stagingDir =
179         new Path(conf.get(BackupRestoreConstants.CONF_STAGING_ROOT, fs.getWorkingDirectory()
180           .toString()));
181     FileStatus[] files = FSUtils.listStatus(fs, stagingDir);
182     if (files == null) {
183       return;
184     }
185     for (FileStatus file : files) {
186       if (file.getPath().getName().startsWith("exportSnapshot-")) {
187         LOG.debug("Delete log files of exporting snapshot: " + file.getPath().getName());
188         if (FSUtils.delete(fs, file.getPath(), true) == false) {
189           LOG.warn("Can not delete " + file.getPath());
190         }
191       }
192     }
193   }
194 
195   /**
196    * Clean up the uncompleted data at target directory if the ongoing backup has already entered the
197    * copy phase.
198    */
199   static void cleanupTargetDir(BackupInfo backupInfo, Configuration conf) {
200     try {
201       // clean up the uncompleted data at target directory if the ongoing backup has already entered
202       // the copy phase
203       LOG.debug("Trying to cleanup up target dir. Current backup phase: "
204           + backupInfo.getPhase());
205       if (backupInfo.getPhase().equals(BackupPhase.SNAPSHOTCOPY)
206           || backupInfo.getPhase().equals(BackupPhase.INCREMENTAL_COPY)
207           || backupInfo.getPhase().equals(BackupPhase.STORE_MANIFEST)) {
208         FileSystem outputFs =
209             FileSystem.get(new Path(backupInfo.getTargetRootDir()).toUri(), conf);
210 
211         // now treat one backup as a transaction, clean up data that has been partially copied at
212         // table level
213         for (TableName table : backupInfo.getTables()) {
214           Path targetDirPath =
215               new Path(HBackupFileSystem.getTableBackupDir(backupInfo.getTargetRootDir(),
216                 backupInfo.getBackupId(), table));
217           if (outputFs.delete(targetDirPath, true)) {
218             LOG.info("Cleaning up uncompleted backup data at " + targetDirPath.toString()
219               + " done.");
220           } else {
221             LOG.info("No data has been copied to " + targetDirPath.toString() + ".");
222           }
223 
224           Path tableDir = targetDirPath.getParent();
225           FileStatus[] backups = FSUtils.listStatus(outputFs, tableDir);
226           if (backups == null || backups.length == 0) {
227             outputFs.delete(tableDir, true);
228             LOG.debug(tableDir.toString() + " is empty, remove it.");
229           }
230         }
231       }
232 
233     } catch (IOException e1) {
234       LOG.error("Cleaning up uncompleted backup data of " + backupInfo.getBackupId() + " at "
235           + backupInfo.getTargetRootDir() + " failed due to " + e1.getMessage() + ".");
236     }
237   }
238 
239   /**
240    * Fail the overall backup.
241    * @param backupInfo backup context
242    * @param e exception
243    * @throws Exception exception
244    */
245   static void failBackup(final MasterProcedureEnv env, BackupInfo backupInfo,
246       BackupManager backupManager, Exception e,
247       String msg, BackupType type, Configuration conf) throws IOException {
248     LOG.error(msg + getMessage(e), e);
249     // If this is a cancel exception, then we've already cleaned.
250 
251     // set the failure timestamp of the overall backup
252     backupInfo.setEndTs(EnvironmentEdgeManager.currentTime());
253 
254     // set failure message
255     backupInfo.setFailedMsg(e.getMessage());
256 
257     // set overall backup status: failed
258     backupInfo.setState(BackupState.FAILED);
259 
260     // compose the backup failed data
261     String backupFailedData =
262         "BackupId=" + backupInfo.getBackupId() + ",startts=" + backupInfo.getStartTs()
263         + ",failedts=" + backupInfo.getEndTs() + ",failedphase=" + backupInfo.getPhase()
264         + ",failedmessage=" + backupInfo.getFailedMsg();
265     LOG.error(backupFailedData);
266 
267     backupManager.updateBackupInfo(backupInfo);
268 
269     // if full backup, then delete HBase snapshots if there already are snapshots taken
270     // and also clean up export snapshot log files if exist
271     if (type == BackupType.FULL) {
272       deleteSnapshot(env, backupInfo, conf);
273       cleanupExportSnapshotLog(conf);
274     }
275 
276     // clean up the uncompleted data at target directory if the ongoing backup has already entered
277     // the copy phase
278     // For incremental backup, DistCp logs will be cleaned with the targetDir.
279     cleanupTargetDir(backupInfo, conf);
280 
281     LOG.info("Backup " + backupInfo.getBackupId() + " failed.");
282   }
283 
284   /**
285    * Do snapshot copy.
286    * @param backupInfo backup context
287    * @throws Exception exception
288    */
289   private void snapshotCopy(BackupInfo backupInfo) throws Exception {
290     LOG.info("Snapshot copy is starting.");
291 
292     // set overall backup phase: snapshot_copy
293     backupInfo.setPhase(BackupPhase.SNAPSHOTCOPY);
294 
295     // call ExportSnapshot to copy files based on hbase snapshot for backup
296     // ExportSnapshot only support single snapshot export, need loop for multiple tables case
297     BackupCopyService copyService = BackupRestoreServerFactory.getBackupCopyService(conf);
298 
299     // number of snapshots matches number of tables
300     float numOfSnapshots = backupInfo.getSnapshotNames().size();
301 
302     LOG.debug("There are " + (int) numOfSnapshots + " snapshots to be copied.");
303 
304     for (TableName table : backupInfo.getTables()) {
305       // Currently we simply set the sub copy tasks by counting the table snapshot number, we can
306       // calculate the real files' size for the percentage in the future.
307       // backupCopier.setSubTaskPercntgInWholeTask(1f / numOfSnapshots);
308       int res = 0;
309       String[] args = new String[4];
310       args[0] = "-snapshot";
311       args[1] = backupInfo.getSnapshotName(table);
312       args[2] = "-copy-to";
313       args[3] = backupInfo.getBackupStatus(table).getTargetDir();
314 
315       LOG.debug("Copy snapshot " + args[1] + " to " + args[3]);
316       res = copyService.copy(backupInfo, backupManager, conf, BackupCopyService.Type.FULL, args);
317       // if one snapshot export failed, do not continue for remained snapshots
318       if (res != 0) {
319         LOG.error("Exporting Snapshot " + args[1] + " failed with return code: " + res + ".");
320 
321         throw new IOException("Failed of exporting snapshot " + args[1] + " to " + args[3]
322             + " with reason code " + res);
323       }
324 
325       LOG.info("Snapshot copy " + args[1] + " finished.");
326     }
327   }
328   
329   /**
330    * Add manifest for the current backup. The manifest is stored
331    * within the table backup directory.
332    * @param backupInfo The current backup context
333    * @throws IOException exception
334    * @throws BackupException exception
335    */
336   private static void addManifest(BackupInfo backupInfo, BackupManager backupManager,
337       BackupType type, Configuration conf) throws IOException, BackupException {
338     // set the overall backup phase : store manifest
339     backupInfo.setPhase(BackupPhase.STORE_MANIFEST);
340 
341     BackupManifest manifest;
342 
343     // Since we have each table's backup in its own directory structure,
344     // we'll store its manifest with the table directory.
345     for (TableName table : backupInfo.getTables()) {
346       manifest = new BackupManifest(backupInfo, table);
347       ArrayList<BackupImage> ancestors = backupManager.getAncestors(backupInfo, table);
348       for (BackupImage image : ancestors) {
349         manifest.addDependentImage(image);
350       }
351 
352       if (type == BackupType.INCREMENTAL) {
353         // We'll store the log timestamps for this table only in its manifest.
354         HashMap<TableName, HashMap<String, Long>> tableTimestampMap =
355             new HashMap<TableName, HashMap<String, Long>>();
356         tableTimestampMap.put(table, backupInfo.getIncrTimestampMap().get(table));
357         manifest.setIncrTimestampMap(tableTimestampMap);
358         ArrayList<BackupImage> ancestorss = backupManager.getAncestors(backupInfo);
359         for (BackupImage image : ancestorss) {
360           manifest.addDependentImage(image);
361         }
362       }
363       manifest.store(conf);
364     }
365 
366     // For incremental backup, we store a overall manifest in
367     // <backup-root-dir>/WALs/<backup-id>
368     // This is used when created the next incremental backup
369     if (type == BackupType.INCREMENTAL) {
370       manifest = new BackupManifest(backupInfo);
371       // set the table region server start and end timestamps for incremental backup
372       manifest.setIncrTimestampMap(backupInfo.getIncrTimestampMap());
373       ArrayList<BackupImage> ancestors = backupManager.getAncestors(backupInfo);
374       for (BackupImage image : ancestors) {
375         manifest.addDependentImage(image);
376       }
377       manifest.store(conf);
378     }
379   }
380 
381   /**
382    * Get backup request meta data dir as string.
383    * @param backupInfo backup context
384    * @return meta data dir
385    */
386   private static String obtainBackupMetaDataStr(BackupInfo backupInfo) {
387     StringBuffer sb = new StringBuffer();
388     sb.append("type=" + backupInfo.getType() + ",tablelist=");
389     for (TableName table : backupInfo.getTables()) {
390       sb.append(table + ";");
391     }
392     if (sb.lastIndexOf(";") > 0) {
393       sb.delete(sb.lastIndexOf(";"), sb.lastIndexOf(";") + 1);
394     }
395     sb.append(",targetRootDir=" + backupInfo.getTargetRootDir());
396 
397     return sb.toString();
398   }
399 
400   /**
401    * Clean up directories with prefix "_distcp_logs-", which are generated when DistCp copying
402    * hlogs.
403    * @throws IOException exception
404    */
405   private static void cleanupDistCpLog(BackupInfo backupInfo, Configuration conf)
406       throws IOException {
407     Path rootPath = new Path(backupInfo.getHLogTargetDir()).getParent();
408     FileSystem fs = FileSystem.get(rootPath.toUri(), conf);
409     FileStatus[] files = FSUtils.listStatus(fs, rootPath);
410     if (files == null) {
411       return;
412     }
413     for (FileStatus file : files) {
414       if (file.getPath().getName().startsWith("_distcp_logs")) {
415         LOG.debug("Delete log files of DistCp: " + file.getPath().getName());
416         FSUtils.delete(fs, file.getPath(), true);
417       }
418     }
419   }
420 
421   /**
422    * Complete the overall backup.
423    * @param backupInfo backup context
424    * @throws Exception exception
425    */
426   static void completeBackup(final MasterProcedureEnv env, BackupInfo backupInfo,
427       BackupManager backupManager, BackupType type, Configuration conf) throws IOException {
428     // set the complete timestamp of the overall backup
429     backupInfo.setEndTs(EnvironmentEdgeManager.currentTime());
430     // set overall backup status: complete
431     backupInfo.setState(BackupState.COMPLETE);
432     backupInfo.setProgress(100);
433     // add and store the manifest for the backup
434     addManifest(backupInfo, backupManager, type, conf);
435 
436     // after major steps done and manifest persisted, do convert if needed for incremental backup
437     /* in-fly convert code here, provided by future jira */
438     LOG.debug("in-fly convert code here, provided by future jira");
439 
440     // compose the backup complete data
441     String backupCompleteData =
442         obtainBackupMetaDataStr(backupInfo) + ",startts=" + backupInfo.getStartTs()
443         + ",completets=" + backupInfo.getEndTs() + ",bytescopied="
444         + backupInfo.getTotalBytesCopied();
445     if (LOG.isDebugEnabled()) {
446       LOG.debug("Backup " + backupInfo.getBackupId() + " finished: " + backupCompleteData);
447     }
448     backupManager.updateBackupInfo(backupInfo);
449 
450     // when full backup is done:
451     // - delete HBase snapshot
452     // - clean up directories with prefix "exportSnapshot-", which are generated when exporting
453     // snapshots
454     if (type == BackupType.FULL) {
455       deleteSnapshot(env, backupInfo, conf);
456       cleanupExportSnapshotLog(conf);
457     } else if (type == BackupType.INCREMENTAL) {
458       cleanupDistCpLog(backupInfo, conf);
459     }
460 
461     LOG.info("Backup " + backupInfo.getBackupId() + " completed.");
462   }
463 
464   /**
465    * Wrap a SnapshotDescription for a target table.
466    * @param table table
467    * @return a SnapshotDescription especially for backup.
468    */
469   static SnapshotDescription wrapSnapshotDescription(TableName tableName, String snapshotName) {
470     // Mock a SnapshotDescription from backupInfo to call SnapshotManager function,
471     // Name it in the format "snapshot_<timestamp>_<table>"
472     HBaseProtos.SnapshotDescription.Builder builder = HBaseProtos.SnapshotDescription.newBuilder();
473     builder.setTable(tableName.getNameAsString());
474     builder.setName(snapshotName);
475     HBaseProtos.SnapshotDescription backupSnapshot = builder.build();
476 
477     LOG.debug("Wrapped a SnapshotDescription " + backupSnapshot.getName()
478       + " from backupInfo to request snapshot for backup.");
479 
480     return backupSnapshot;
481   }
482 
483   @Override
484   protected Flow executeFromState(final MasterProcedureEnv env, final FullTableBackupState state) {
485     if (conf == null) {
486       conf = env.getMasterConfiguration();
487     }
488     if (backupManager == null) {
489       try {
490         backupManager = new BackupManager(env.getMasterConfiguration());
491       } catch (IOException ioe) {
492         setFailure("full backup", ioe);
493         return Flow.NO_MORE_STATE;
494       }
495     }
496     if (LOG.isTraceEnabled()) {
497       LOG.trace(this + " execute state=" + state);
498     }
499     try {
500       switch (state) {
501         case PRE_SNAPSHOT_TABLE:
502           beginBackup(backupManager, backupInfo);
503           String savedStartCode = null;
504           boolean firstBackup = false;
505           // do snapshot for full table backup
506 
507           try {
508             savedStartCode = backupManager.readBackupStartCode();
509             firstBackup = savedStartCode == null || Long.parseLong(savedStartCode) == 0L;
510             if (firstBackup) {
511               // This is our first backup. Let's put some marker on ZK so that we can hold the logs
512               // while we do the backup.
513               backupManager.writeBackupStartCode(0L);
514             }
515             // We roll log here before we do the snapshot. It is possible there is duplicate data
516             // in the log that is already in the snapshot. But if we do it after the snapshot, we
517             // could have data loss.
518             // A better approach is to do the roll log on each RS in the same global procedure as
519             // the snapshot.
520             LOG.info("Execute roll log procedure for full backup ...");
521             MasterProcedureManager mpm = env.getMasterServices().getMasterProcedureManagerHost()
522                 .getProcedureManager(LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_SIGNATURE);
523             Map<String, String> props= new HashMap<String, String>();
524             props.put("backupRoot", backupInfo.getTargetRootDir());
525             long waitTime = MasterProcedureUtil.execProcedure(mpm,
526               LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_SIGNATURE,
527               LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_NAME, props);
528             MasterProcedureUtil.waitForProcedure(mpm,
529               LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_SIGNATURE,
530               LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_NAME, props, waitTime,
531               conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
532                 HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER),
533               conf.getLong(HConstants.HBASE_CLIENT_PAUSE,
534                 HConstants.DEFAULT_HBASE_CLIENT_PAUSE));
535 
536             newTimestamps = backupManager.readRegionServerLastLogRollResult();
537             if (firstBackup) {
538               // Updates registered log files
539               // We record ALL old WAL files as registered, because
540               // this is a first full backup in the system and these
541               // files are not needed for next incremental backup
542               List<String> logFiles = BackupServerUtil.getWALFilesOlderThan(conf, newTimestamps);
543               backupManager.recordWALFiles(logFiles);
544             }
545           } catch (BackupException e) {
546             setFailure("Failure in full-backup: pre-snapshot phase", e);
547             // fail the overall backup and return
548             failBackup(env, backupInfo, backupManager, e, "Unexpected BackupException : ",
549               BackupType.FULL, conf);
550             return Flow.NO_MORE_STATE;
551           }
552           setNextState(FullTableBackupState.SNAPSHOT_TABLES);
553           break;
554         case SNAPSHOT_TABLES:
555           for (TableName tableName : tableList) {
556             String snapshotName = "snapshot_" + Long.toString(EnvironmentEdgeManager.currentTime())
557                 + "_" + tableName.getNamespaceAsString() + "_" + tableName.getQualifierAsString();
558             HBaseProtos.SnapshotDescription backupSnapshot;
559 
560             // wrap a SnapshotDescription for offline/online snapshot
561             backupSnapshot = wrapSnapshotDescription(tableName,snapshotName);
562             try {
563               env.getMasterServices().getSnapshotManager().deleteSnapshot(backupSnapshot);
564             } catch (IOException e) {
565               LOG.debug("Unable to delete " + snapshotName, e);
566             }
567             // Kick off snapshot for backup
568             snapshotTable(env, backupSnapshot);
569             backupInfo.setSnapshotName(tableName, backupSnapshot.getName());
570           }
571           setNextState(FullTableBackupState.SNAPSHOT_COPY);
572           break;
573         case SNAPSHOT_COPY:
574           // do snapshot copy
575           LOG.debug("snapshot copy for " + backupId);
576           try {
577             this.snapshotCopy(backupInfo);
578           } catch (Exception e) {
579             setFailure("Failure in full-backup: snapshot copy phase" + backupId, e);
580             // fail the overall backup and return
581             failBackup(env, backupInfo, backupManager, e, "Unexpected BackupException : ",
582               BackupType.FULL, conf);
583             return Flow.NO_MORE_STATE;
584           }
585           // Updates incremental backup table set
586           backupManager.addIncrementalBackupTableSet(backupInfo.getTables());
587           setNextState(FullTableBackupState.BACKUP_COMPLETE);
588           break;
589 
590         case BACKUP_COMPLETE:
591           // set overall backup status: complete. Here we make sure to complete the backup.
592           // After this checkpoint, even if entering cancel process, will let the backup finished
593           backupInfo.setState(BackupState.COMPLETE);
594           // The table list in backupInfo is good for both full backup and incremental backup.
595           // For incremental backup, it contains the incremental backup table set.
596           backupManager.writeRegionServerLogTimestamp(backupInfo.getTables(), newTimestamps);
597 
598           HashMap<TableName, HashMap<String, Long>> newTableSetTimestampMap =
599               backupManager.readLogTimestampMap();
600 
601           Long newStartCode =
602             BackupClientUtil.getMinValue(BackupServerUtil.getRSLogTimestampMins(newTableSetTimestampMap));
603           backupManager.writeBackupStartCode(newStartCode);
604 
605           // backup complete
606           completeBackup(env, backupInfo, backupManager, BackupType.FULL, conf);
607           return Flow.NO_MORE_STATE;
608 
609         default:
610           throw new UnsupportedOperationException("unhandled state=" + state);
611       }
612     } catch (IOException e) {
613       LOG.error("Backup failed in " + state);
614       setFailure("snapshot-table", e);
615     }
616     return Flow.HAS_MORE_STATE;
617   }
618 
619   private void snapshotTable(final MasterProcedureEnv env, SnapshotDescription backupSnapshot)
620     throws IOException
621   {
622     
623     int maxAttempts = env.getMasterConfiguration().getInt(SNAPSHOT_BACKUP_MAX_ATTEMPTS_KEY, 
624       DEFAULT_SNAPSHOT_BACKUP_MAX_ATTEMPTS);
625     int delay = env.getMasterConfiguration().getInt(SNAPSHOT_BACKUP_ATTEMPTS_DELAY_KEY, 
626       DEFAULT_SNAPSHOT_BACKUP_ATTEMPTS_DELAY);    
627     int attempts = 0;
628     
629     while (attempts++ < maxAttempts) {
630       try {
631         env.getMasterServices().getSnapshotManager().takeSnapshot(backupSnapshot);
632         long waitTime = SnapshotDescriptionUtils.getMaxMasterTimeout(
633           env.getMasterConfiguration(),
634           backupSnapshot.getType(), SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME);
635         BackupServerUtil.waitForSnapshot(backupSnapshot, waitTime,
636           env.getMasterServices().getSnapshotManager(), env.getMasterConfiguration());
637         break;
638       } catch( Exception ee) {
639         LOG.warn("Snapshot attempt "+attempts +" failed for table "+backupSnapshot.getTable() +
640           ", sleeping for " + delay+"ms", ee);        
641         if(attempts < maxAttempts) {
642           try {
643             Thread.sleep(delay);
644           } catch (InterruptedException e) {
645             Thread.currentThread().interrupt();
646           }
647         }
648       } 
649     }    
650   }
651   @Override
652   protected void rollbackState(final MasterProcedureEnv env, final FullTableBackupState state)
653       throws IOException {
654     if (state != FullTableBackupState.PRE_SNAPSHOT_TABLE) {
655       deleteSnapshot(env, backupInfo, conf);
656       cleanupExportSnapshotLog(conf);
657     }
658 
659     // clean up the uncompleted data at target directory if the ongoing backup has already entered
660     // the copy phase
661     // For incremental backup, DistCp logs will be cleaned with the targetDir.
662     if (state == FullTableBackupState.SNAPSHOT_COPY) {
663       cleanupTargetDir(backupInfo, conf);
664     }
665   }
666 
667   @Override
668   protected FullTableBackupState getState(final int stateId) {
669     return FullTableBackupState.valueOf(stateId);
670   }
671 
672   @Override
673   protected int getStateId(final FullTableBackupState state) {
674     return state.getNumber();
675   }
676 
677   @Override
678   protected FullTableBackupState getInitialState() {
679     return FullTableBackupState.PRE_SNAPSHOT_TABLE;
680   }
681 
682   @Override
683   protected void setNextState(final FullTableBackupState state) {
684     if (aborted.get()) {
685       setAbortFailure("backup-table", "abort requested");
686     } else {
687       super.setNextState(state);
688     }
689   }
690 
691   @Override
692   public boolean abort(final MasterProcedureEnv env) {
693     aborted.set(true);
694     return true;
695   }
696 
697   @Override
698   public void toStringClassDetails(StringBuilder sb) {
699     sb.append(getClass().getSimpleName());
700     sb.append(" (targetRootDir=");
701     sb.append(targetRootDir);
702     sb.append("; backupId=").append(backupId);
703     sb.append("; tables=");
704     int len = tableList.size();
705     for (int i = 0; i < len-1; i++) {
706       sb.append(tableList.get(i)).append(",");
707     }
708     if (len >= 1) sb.append(tableList.get(len-1));
709     sb.append(")");
710   }
711 
712   BackupProtos.BackupProcContext toBackupInfo() {
713     BackupProtos.BackupProcContext.Builder ctxBuilder = BackupProtos.BackupProcContext.newBuilder();
714     ctxBuilder.setCtx(backupInfo.toProtosBackupInfo());
715     if (newTimestamps != null && !newTimestamps.isEmpty()) {
716       BackupProtos.ServerTimestamp.Builder tsBuilder = ServerTimestamp.newBuilder();
717       for (Entry<String, Long> entry : newTimestamps.entrySet()) {
718         tsBuilder.clear().setServer(entry.getKey()).setTimestamp(entry.getValue());
719         ctxBuilder.addServerTimestamp(tsBuilder.build());
720       }
721     }
722     return ctxBuilder.build();
723   }
724 
725   @Override
726   public void serializeStateData(final OutputStream stream) throws IOException {
727     super.serializeStateData(stream);
728 
729     BackupProtos.BackupProcContext backupProcCtx = toBackupInfo();
730     backupProcCtx.writeDelimitedTo(stream);
731   }
732 
733   @Override
734   public void deserializeStateData(final InputStream stream) throws IOException {
735     super.deserializeStateData(stream);
736 
737     BackupProtos.BackupProcContext proto =BackupProtos.BackupProcContext.parseDelimitedFrom(stream);
738     backupInfo = BackupInfo.fromProto(proto.getCtx());
739     backupId = backupInfo.getBackupId();
740     targetRootDir = backupInfo.getTargetRootDir();
741     tableList = backupInfo.getTableNames();
742     List<ServerTimestamp> svrTimestamps = proto.getServerTimestampList();
743     if (svrTimestamps != null && !svrTimestamps.isEmpty()) {
744       newTimestamps = new HashMap<>();
745       for (ServerTimestamp ts : svrTimestamps) {
746         newTimestamps.put(ts.getServer(), ts.getTimestamp());
747       }
748     }
749   }
750 
751   @Override
752   public TableName getTableName() {
753     return TableName.BACKUP_TABLE_NAME; 
754   }
755 
756   @Override
757   public TableOperationType getTableOperationType() {
758     return TableOperationType.BACKUP;
759   }
760 
761   @Override
762   protected boolean acquireLock(final MasterProcedureEnv env) {
763     if (!env.isInitialized() && !getTableName().isSystemTable()) {
764       return false;
765     }
766     return env.getProcedureQueue().tryAcquireTableWrite(getTableName(), "full backup");
767     /*
768     if (env.waitInitialized(this)) {
769       return false;
770     }
771     return env.getProcedureQueue().tryAcquireTableExclusiveLock(this, TableName.BACKUP_TABLE_NAME);
772     */
773   }
774 
775   @Override
776   protected void releaseLock(final MasterProcedureEnv env) {
777     env.getProcedureQueue().releaseTableWrite(getTableName());
778   }
779 }