View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements. See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership. The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License. You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.backup.impl;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.HashMap;
24  import java.util.Iterator;
25  import java.util.List;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileStatus;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.fs.PathFilter;
34  import org.apache.hadoop.hbase.HConstants;
35  import org.apache.hadoop.hbase.TableName;
36  import org.apache.hadoop.hbase.backup.BackupInfo;
37  import org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager;
38  import org.apache.hadoop.hbase.backup.util.BackupClientUtil;
39  import org.apache.hadoop.hbase.backup.util.BackupServerUtil;
40  import org.apache.hadoop.hbase.classification.InterfaceAudience;
41  import org.apache.hadoop.hbase.classification.InterfaceStability;
42  import org.apache.hadoop.hbase.client.Admin;
43  import org.apache.hadoop.hbase.client.Connection;
44  import org.apache.hadoop.hbase.util.FSUtils;
45  import org.apache.hadoop.hbase.wal.DefaultWALProvider;
46  import org.apache.hadoop.hbase.backup.impl.BackupSystemTable.WALItem;
47  
48  /**
49   * After a full backup was created, the incremental backup will only store the changes made
50   * after the last full or incremental backup.
51   *
52   * Creating the backup copies the logfiles in .logs and .oldlogs since the last backup timestamp.
53   */
54  @InterfaceAudience.Public
55  @InterfaceStability.Evolving
56  public class IncrementalBackupManager {
57    public static final Log LOG = LogFactory.getLog(IncrementalBackupManager.class);
58  
59    // parent manager
60    private final BackupManager backupManager;
61    private final Configuration conf;
62    private final Connection conn;
63  
64    public IncrementalBackupManager(BackupManager bm) {
65      this.backupManager = bm;
66      this.conf = bm.getConf();
67      this.conn = bm.getConnection();
68    }
69  
70    /**
71     * Obtain the list of logs that need to be copied out for this incremental backup. The list is set
72     * in BackupContext.
73     * @param backupContext backup context
74     * @return The new HashMap of RS log timestamps after the log roll for this incremental backup.
75     * @throws IOException exception
76     */
77    public HashMap<String, Long> getIncrBackupLogFileList(BackupInfo backupContext)
78        throws IOException {
79      List<String> logList;
80      HashMap<String, Long> newTimestamps;
81      HashMap<String, Long> previousTimestampMins;
82  
83      String savedStartCode = backupManager.readBackupStartCode();
84  
85      // key: tableName
86      // value: <RegionServer,PreviousTimeStamp>
87      HashMap<TableName, HashMap<String, Long>> previousTimestampMap =
88          backupManager.readLogTimestampMap();
89  
90      previousTimestampMins = BackupServerUtil.getRSLogTimestampMins(previousTimestampMap);    
91  
92      if (LOG.isDebugEnabled()) {
93        LOG.debug("StartCode " + savedStartCode + "for backupID " + backupContext.getBackupId());
94      }
95      // get all new log files from .logs and .oldlogs after last TS and before new timestamp
96      if (savedStartCode == null ||
97          previousTimestampMins == null ||
98            previousTimestampMins.isEmpty()) {
99        throw new IOException("Cannot read any previous back up timestamps from hbase:backup. "
100           + "In order to create an incremental backup, at least one full backup is needed.");
101     }
102 
103     try (Admin admin = conn.getAdmin()) {
104       LOG.info("Execute roll log procedure for incremental backup ...");
105       HashMap<String, String> props = new HashMap<String, String>();
106       props.put("backupRoot", backupContext.getTargetRootDir());
107       admin.execProcedure(LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_SIGNATURE,
108         LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_NAME, props);
109     }
110 
111     newTimestamps = backupManager.readRegionServerLastLogRollResult();
112 
113     logList = getLogFilesForNewBackup(previousTimestampMins, newTimestamps, conf, savedStartCode);
114     List<WALItem> logFromSystemTable = 
115         getLogFilesFromBackupSystem(previousTimestampMins, 
116       newTimestamps, backupManager.getBackupContext().getTargetRootDir());
117     addLogsFromBackupSystemToContext(logFromSystemTable);
118 
119     logList = excludeAlreadyBackedUpWALs(logList, logFromSystemTable);
120     backupContext.setIncrBackupFileList(logList);
121 
122     return newTimestamps;
123   }
124 
125 
126   private List<String> excludeAlreadyBackedUpWALs(List<String> logList,
127       List<WALItem> logFromSystemTable) {
128     
129     List<String> backupedWALList = toWALList(logFromSystemTable);
130     logList.removeAll(backupedWALList);
131     return logList;
132   }
133 
134   private List<String> toWALList(List<WALItem> logFromSystemTable) {
135     
136     List<String> list = new ArrayList<String>(logFromSystemTable.size());
137     for(WALItem item : logFromSystemTable){
138       list.add(item.getWalFile());
139     }
140     return list;
141   }
142 
143   private void addLogsFromBackupSystemToContext(List<WALItem> logFromSystemTable) {
144     List<String> walFiles = new ArrayList<String>();
145     for(WALItem item : logFromSystemTable){
146       Path p = new Path(item.getWalFile());
147       String walFileName = p.getName();
148       String backupId = item.getBackupId();
149       String relWALPath = backupId + Path.SEPARATOR+walFileName;
150       walFiles.add(relWALPath);
151     }    
152   }
153 
154 
155   /**
156    * For each region server: get all log files newer than the last timestamps,
157    * but not newer than the newest timestamps. FROM hbase:backup table
158    * @param olderTimestamps - the timestamp for each region server of the last backup.
159    * @param newestTimestamps - the timestamp for each region server that the backup should lead to.
160    * @return list of log files which needs to be added to this backup
161    * @throws IOException
162    */
163   private List<WALItem> getLogFilesFromBackupSystem(HashMap<String, Long> olderTimestamps,
164     HashMap<String, Long> newestTimestamps, String backupRoot) throws IOException {
165     List<WALItem> logFiles = new ArrayList<WALItem>();
166     Iterator<WALItem> it = backupManager.getWALFilesFromBackupSystem();
167     while (it.hasNext()) {
168       WALItem item = it.next();
169       String rootDir = item.getBackupRoot();
170       if(!rootDir.equals(backupRoot)) {
171         continue;
172       }
173       String walFileName = item.getWalFile();      
174       String server = BackupServerUtil.parseHostNameFromLogFile(new Path(walFileName));
175       if(server == null) continue;
176       Long tss = getTimestamp(walFileName);
177       Long oldTss = olderTimestamps.get(server);
178       Long newTss = newestTimestamps.get(server);
179       if (oldTss == null){
180         logFiles.add(item);
181         continue;
182       }
183       if (newTss == null) {
184         newTss = Long.MAX_VALUE;
185       }
186       if (tss > oldTss && tss < newTss) {
187         logFiles.add(item);
188       }
189     }
190     return logFiles;
191   }
192 
193   private Long getTimestamp(String walFileName) {
194     int index = walFileName.lastIndexOf(BackupServerUtil.LOGNAME_SEPARATOR);
195     return Long.parseLong(walFileName.substring(index+1));
196   }
197 
198   /**
199    * For each region server: get all log files newer than the last timestamps but not newer than the
200    * newest timestamps.
201    * @param olderTimestamps the timestamp for each region server of the last backup.
202    * @param newestTimestamps the timestamp for each region server that the backup should lead to.
203    * @param conf the Hadoop and Hbase configuration
204    * @param savedStartCode the startcode (timestamp) of last successful backup.
205    * @return a list of log files to be backed up
206    * @throws IOException exception
207    */
208   private List<String> getLogFilesForNewBackup(HashMap<String, Long> olderTimestamps,
209     HashMap<String, Long> newestTimestamps, Configuration conf, String savedStartCode)
210         throws IOException {
211     LOG.debug("In getLogFilesForNewBackup()\n" + "olderTimestamps: " + olderTimestamps
212       + "\n newestTimestamps: " + newestTimestamps);
213     Path rootdir = FSUtils.getRootDir(conf);
214     Path logDir = new Path(rootdir, HConstants.HREGION_LOGDIR_NAME);
215     Path oldLogDir = new Path(rootdir, HConstants.HREGION_OLDLOGDIR_NAME);
216     FileSystem fs = rootdir.getFileSystem(conf);
217     NewestLogFilter pathFilter = new NewestLogFilter();
218 
219     List<String> resultLogFiles = new ArrayList<String>();
220     List<String> newestLogs = new ArrayList<String>();
221 
222     /*
223      * The old region servers and timestamps info we kept in hbase:backup may be out of sync if new
224      * region server is added or existing one lost. We'll deal with it here when processing the
225      * logs. If data in hbase:backup has more hosts, just ignore it. If the .logs directory includes
226      * more hosts, the additional hosts will not have old timestamps to compare with. We'll just use
227      * all the logs in that directory. We always write up-to-date region server and timestamp info
228      * to hbase:backup at the end of successful backup.
229      */
230 
231     FileStatus[] rss;
232     Path p;
233     String host;
234     Long oldTimeStamp;
235     String currentLogFile;
236     Long currentLogTS;
237 
238     // Get the files in .logs.
239     rss = fs.listStatus(logDir);
240     for (FileStatus rs : rss) {
241       p = rs.getPath();
242       host = BackupServerUtil.parseHostNameFromLogFile(p);
243       if (host == null) {
244         LOG.warn("Skipping "+p+" not a valid log file");
245         continue;
246       }
247       FileStatus[] logs;
248       oldTimeStamp = olderTimestamps.get(host);
249       // It is possible that there is no old timestamp in hbase:backup for this host if
250       // this region server is newly added after our last backup.
251       if (oldTimeStamp == null) {
252         logs = fs.listStatus(p);
253       } else {
254         pathFilter.setLastBackupTS(oldTimeStamp);
255         logs = fs.listStatus(p, pathFilter);
256       }
257       for (FileStatus log : logs) {
258         LOG.debug("currentLogFile: " + log.getPath().toString());
259         if (DefaultWALProvider.isMetaFile(log.getPath())) {
260           if(LOG.isDebugEnabled()) {
261             LOG.debug("Skip hbase:meta log file: " + log.getPath().getName());
262           }
263           continue;
264         }
265         currentLogFile = log.getPath().toString();
266         resultLogFiles.add(currentLogFile);
267         currentLogTS = BackupClientUtil.getCreationTime(log.getPath());
268         // newestTimestamps is up-to-date with the current list of hosts
269         // so newestTimestamps.get(host) will not be null.
270         if (Long.valueOf(currentLogTS) > Long.valueOf(newestTimestamps.get(host))) {
271           newestLogs.add(currentLogFile);
272         }
273       }
274     }
275 
276     // Include the .oldlogs files too.
277     FileStatus[] oldlogs = fs.listStatus(oldLogDir);
278     for (FileStatus oldlog : oldlogs) {
279       p = oldlog.getPath();
280       currentLogFile = p.toString();
281       if (DefaultWALProvider.isMetaFile(p)) {
282         if(LOG.isDebugEnabled()) {
283           LOG.debug("Skip .meta log file: " + currentLogFile);
284         }
285         continue;
286       }
287       host = BackupClientUtil.parseHostFromOldLog(p);
288       if(host == null){
289         LOG.debug("Skip file: " + currentLogFile);
290         continue;
291       }
292       currentLogTS = BackupClientUtil.getCreationTime(p);
293       oldTimeStamp = olderTimestamps.get(host);
294       /*
295        * It is possible that there is no old timestamp in hbase:backup for this host. At the time of
296        * our last backup operation, this rs did not exist. The reason can be one of the two: 1. The
297        * rs already left/crashed. Its logs were moved to .oldlogs. 2. The rs was added after our
298        * last backup.
299        */
300       if (oldTimeStamp == null) {
301         if (Long.valueOf(currentLogTS) < Long.valueOf(savedStartCode)) {
302           // This log file is really old, its region server was before our last backup.
303           continue;
304         } else {
305           resultLogFiles.add(currentLogFile);
306         }
307       } else if (Long.valueOf(currentLogTS) > Long.valueOf(oldTimeStamp)) {
308         resultLogFiles.add(currentLogFile);
309       }
310 
311       // It is possible that a host in .oldlogs is an obsolete region server
312       // so newestTimestamps.get(host) here can be null.
313       // Even if these logs belong to a obsolete region server, we still need
314       // to include they to avoid loss of edits for backup.
315       Long newTimestamp = newestTimestamps.get(host);
316       if (newTimestamp != null && Long.valueOf(currentLogTS) > Long.valueOf(newTimestamp)) {
317         newestLogs.add(currentLogFile);
318       }
319     }
320     // remove newest log per host because they are still in use
321     resultLogFiles.removeAll(newestLogs);
322     return resultLogFiles;
323   }
324 
325   class NewestLogFilter implements PathFilter {
326     private Long lastBackupTS = 0L;
327 
328     public NewestLogFilter() {
329     }
330 
331     protected void setLastBackupTS(Long ts) {
332       this.lastBackupTS = ts;
333     }
334 
335     @Override
336     public boolean accept(Path path) {
337       // skip meta table log -- ts.meta file
338       if (DefaultWALProvider.isMetaFile(path)) {
339         if(LOG.isDebugEnabled()) {
340           LOG.debug("Skip .meta log file: " + path.getName());
341         }
342         return false;
343       }
344       Long timestamp = null;
345       try {
346         timestamp = BackupClientUtil.getCreationTime(path);
347         return timestamp > Long.valueOf(lastBackupTS);
348       } catch (Exception e) {
349         LOG.warn("Cannot read timestamp of log file " + path+", skip it");
350         return false;
351       }
352     }
353   }
354 
355 }