View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.io.Closeable;
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.io.InterruptedIOException;
24  import java.io.PrintWriter;
25  import java.io.StringWriter;
26  import java.net.InetAddress;
27  import java.net.URI;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.Collection;
31  import java.util.Collections;
32  import java.util.Comparator;
33  import java.util.HashMap;
34  import java.util.HashSet;
35  import java.util.Iterator;
36  import java.util.List;
37  import java.util.Map;
38  import java.util.Map.Entry;
39  import java.util.Set;
40  import java.util.SortedMap;
41  import java.util.SortedSet;
42  import java.util.TreeMap;
43  import java.util.TreeSet;
44  import java.util.Vector;
45  import java.util.concurrent.Callable;
46  import java.util.concurrent.ConcurrentSkipListMap;
47  import java.util.concurrent.ExecutionException;
48  import java.util.concurrent.ExecutorService;
49  import java.util.concurrent.Executors;
50  import java.util.concurrent.Future;
51  import java.util.concurrent.FutureTask;
52  import java.util.concurrent.ScheduledThreadPoolExecutor;
53  import java.util.concurrent.TimeUnit;
54  import java.util.concurrent.TimeoutException;
55  import java.util.concurrent.atomic.AtomicBoolean;
56  import java.util.concurrent.atomic.AtomicInteger;
57  
58  import com.google.common.base.Joiner;
59  import com.google.common.base.Preconditions;
60  import com.google.common.collect.ImmutableList;
61  import com.google.common.collect.Lists;
62  import com.google.common.collect.Multimap;
63  import com.google.common.collect.Ordering;
64  import com.google.common.collect.TreeMultimap;
65  import com.google.protobuf.ServiceException;
66  
67  import org.apache.commons.lang.StringUtils;
68  import org.apache.commons.logging.Log;
69  import org.apache.commons.logging.LogFactory;
70  import org.apache.hadoop.hbase.classification.InterfaceAudience;
71  import org.apache.hadoop.hbase.classification.InterfaceStability;
72  import org.apache.hadoop.conf.Configuration;
73  import org.apache.hadoop.conf.Configured;
74  import org.apache.hadoop.fs.FSDataOutputStream;
75  import org.apache.hadoop.fs.FileStatus;
76  import org.apache.hadoop.fs.FileSystem;
77  import org.apache.hadoop.fs.Path;
78  import org.apache.hadoop.fs.permission.FsAction;
79  import org.apache.hadoop.fs.permission.FsPermission;
80  import org.apache.hadoop.hbase.Abortable;
81  import org.apache.hadoop.hbase.Cell;
82  import org.apache.hadoop.hbase.ClusterStatus;
83  import org.apache.hadoop.hbase.CoordinatedStateException;
84  import org.apache.hadoop.hbase.HBaseConfiguration;
85  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
86  import org.apache.hadoop.hbase.HColumnDescriptor;
87  import org.apache.hadoop.hbase.HConstants;
88  import org.apache.hadoop.hbase.HRegionInfo;
89  import org.apache.hadoop.hbase.HRegionLocation;
90  import org.apache.hadoop.hbase.HTableDescriptor;
91  import org.apache.hadoop.hbase.KeyValue;
92  import org.apache.hadoop.hbase.MasterNotRunningException;
93  import org.apache.hadoop.hbase.RegionLocations;
94  import org.apache.hadoop.hbase.ServerName;
95  import org.apache.hadoop.hbase.TableName;
96  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
97  import org.apache.hadoop.hbase.MetaTableAccessor;
98  import org.apache.hadoop.hbase.classification.InterfaceAudience;
99  import org.apache.hadoop.hbase.classification.InterfaceStability;
100 import org.apache.hadoop.hbase.client.Admin;
101 import org.apache.hadoop.hbase.client.ClusterConnection;
102 import org.apache.hadoop.hbase.client.ConnectionFactory;
103 import org.apache.hadoop.hbase.client.Delete;
104 import org.apache.hadoop.hbase.client.Get;
105 import org.apache.hadoop.hbase.client.HBaseAdmin;
106 import org.apache.hadoop.hbase.client.HConnectable;
107 import org.apache.hadoop.hbase.client.HConnection;
108 import org.apache.hadoop.hbase.client.HConnectionManager;
109 import org.apache.hadoop.hbase.client.MetaScanner;
110 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
111 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
112 import org.apache.hadoop.hbase.client.Put;
113 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
114 import org.apache.hadoop.hbase.client.Result;
115 import org.apache.hadoop.hbase.client.RowMutations;
116 import org.apache.hadoop.hbase.client.Table;
117 import org.apache.hadoop.hbase.io.FileLink;
118 import org.apache.hadoop.hbase.io.HFileLink;
119 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
120 import org.apache.hadoop.hbase.io.hfile.HFile;
121 import org.apache.hadoop.hbase.master.MasterFileSystem;
122 import org.apache.hadoop.hbase.master.RegionState;
123 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
124 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
125 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
126 import org.apache.hadoop.hbase.regionserver.HRegion;
127 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
128 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
129 import org.apache.hadoop.hbase.security.AccessDeniedException;
130 import org.apache.hadoop.hbase.security.UserProvider;
131 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
132 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
133 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
134 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
135 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
136 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
137 import org.apache.hadoop.hbase.wal.WALSplitter;
138 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
139 import org.apache.hadoop.hbase.zookeeper.ZKTableStateClientSideReader;
140 import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
141 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
142 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
143 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
144 import org.apache.hadoop.io.IOUtils;
145 import org.apache.hadoop.ipc.RemoteException;
146 import org.apache.hadoop.security.UserGroupInformation;
147 import org.apache.hadoop.util.ReflectionUtils;
148 import org.apache.hadoop.util.Tool;
149 import org.apache.hadoop.util.ToolRunner;
150 import org.apache.zookeeper.KeeperException;
151 
152 /**
153  * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
154  * table integrity problems in a corrupted HBase.
155  * <p>
156  * Region consistency checks verify that hbase:meta, region deployment on region
157  * servers and the state of data in HDFS (.regioninfo files) all are in
158  * accordance.
159  * <p>
160  * Table integrity checks verify that all possible row keys resolve to exactly
161  * one region of a table.  This means there are no individual degenerate
162  * or backwards regions; no holes between regions; and that there are no
163  * overlapping regions.
164  * <p>
165  * The general repair strategy works in two phases:
166  * <ol>
167  * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
168  * <li> Repair Region Consistency with hbase:meta and assignments
169  * </ol>
170  * <p>
171  * For table integrity repairs, the tables' region directories are scanned
172  * for .regioninfo files.  Each table's integrity is then verified.  If there
173  * are any orphan regions (regions with no .regioninfo files) or holes, new
174  * regions are fabricated.  Backwards regions are sidelined as well as empty
175  * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
176  * a new region is created and all data is merged into the new region.
177  * <p>
178  * Table integrity repairs deal solely with HDFS and could potentially be done
179  * offline -- the hbase region servers or master do not need to be running.
180  * This phase can eventually be used to completely reconstruct the hbase:meta table in
181  * an offline fashion.
182  * <p>
183  * Region consistency requires three conditions -- 1) valid .regioninfo file
184  * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
185  * and 3) a region is deployed only at the regionserver that was assigned to
186  * with proper state in the master.
187  * <p>
188  * Region consistency repairs require hbase to be online so that hbck can
189  * contact the HBase master and region servers.  The hbck#connect() method must
190  * first be called successfully.  Much of the region consistency information
191  * is transient and less risky to repair.
192  * <p>
193  * If hbck is run from the command line, there are a handful of arguments that
194  * can be used to limit the kinds of repairs hbck will do.  See the code in
195  * {@link #printUsageAndExit()} for more details.
196  */
197 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
198 @InterfaceStability.Evolving
199 public class HBaseFsck extends Configured implements Closeable {
200   public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
201   public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
202   private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
203   private static boolean rsSupportsOffline = true;
204   private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
205   private static final int DEFAULT_MAX_MERGE = 5;
206   private static final String TO_BE_LOADED = "to_be_loaded";
207   private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
208   private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
209   private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
210   private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
211   // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
212   // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
213   // AlreadyBeingCreatedException which is implies timeout on this operations up to
214   // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
215   private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
216   private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5;
217   private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
218   private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
219 
220   /**********************
221    * Internal resources
222    **********************/
223   private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
224   private ClusterStatus status;
225   private ClusterConnection connection;
226   private Admin admin;
227   private Table meta;
228   // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
229   protected ExecutorService executor;
230   private long startMillis = EnvironmentEdgeManager.currentTime();
231   private HFileCorruptionChecker hfcc;
232   private int retcode = 0;
233   private Path HBCK_LOCK_PATH;
234   private FSDataOutputStream hbckOutFd;
235   // This lock is to prevent cleanup of balancer resources twice between
236   // ShutdownHook and the main code. We cleanup only if the connect() is
237   // successful
238   private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
239 
240   /***********
241    * Options
242    ***********/
243   private static boolean details = false; // do we display the full report
244   private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
245   private static boolean disableSplitAndMerge = false; // disable split and merge
246   private boolean fixAssignments = false; // fix assignment errors?
247   private boolean fixMeta = false; // fix meta errors?
248   private boolean checkHdfs = true; // load and check fs consistency?
249   private boolean fixHdfsHoles = false; // fix fs holes?
250   private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
251   private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
252   private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
253   private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
254   private boolean fixSplitParents = false; // fix lingering split parents
255   private boolean fixReferenceFiles = false; // fix lingering reference store file
256   private boolean fixHFileLinks = false; // fix lingering HFileLinks
257   private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
258   private boolean fixTableLocks = false; // fix table locks which are expired
259   private boolean fixTableZNodes = false; // fix table Znodes which are orphaned
260   private boolean fixAny = false; // Set to true if any of the fix is required.
261 
262   // limit checking/fixes to listed tables, if empty attempt to check/fix all
263   // hbase:meta are always checked
264   private Set<TableName> tablesIncluded = new HashSet<TableName>();
265   private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
266   private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; // maximum number of overlapping regions to sideline
267   private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
268   private Path sidelineDir = null;
269 
270   private boolean rerun = false; // if we tried to fix something, rerun hbck
271   private static boolean summary = false; // if we want to print less output
272   private boolean checkMetaOnly = false;
273   private boolean checkRegionBoundaries = false;
274   private boolean ignorePreCheckPermission = false; // if pre-check permission
275 
276   /*********
277    * State
278    *********/
279   final private ErrorReporter errors;
280   int fixes = 0;
281 
282   /**
283    * This map contains the state of all hbck items.  It maps from encoded region
284    * name to HbckInfo structure.  The information contained in HbckInfo is used
285    * to detect and correct consistency (hdfs/meta/deployment) problems.
286    */
287   private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
288   private TreeSet<TableName> disabledTables =
289     new TreeSet<TableName>();
290   // Empty regioninfo qualifiers in hbase:meta
291   private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
292 
293   /**
294    * This map from Tablename -> TableInfo contains the structures necessary to
295    * detect table consistency problems (holes, dupes, overlaps).  It is sorted
296    * to prevent dupes.
297    *
298    * If tablesIncluded is empty, this map contains all tables.
299    * Otherwise, it contains only meta tables and tables in tablesIncluded,
300    * unless checkMetaOnly is specified, in which case, it contains only
301    * the meta table
302    */
303   private SortedMap<TableName, TableInfo> tablesInfo =
304       new ConcurrentSkipListMap<TableName, TableInfo>();
305 
306   /**
307    * When initially looking at HDFS, we attempt to find any orphaned data.
308    */
309   private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
310 
311   private Map<TableName, Set<String>> orphanTableDirs =
312       new HashMap<TableName, Set<String>>();
313 
314   private Map<TableName, Set<String>> skippedRegions = new HashMap<TableName, Set<String>>();
315 
316   /**
317    * List of orphaned table ZNodes
318    */
319   private Set<TableName> orphanedTableZNodes = new HashSet<TableName>();
320   private final RetryCounterFactory lockFileRetryCounterFactory;
321   private final RetryCounterFactory createZNodeRetryCounterFactory;
322 
323   private ZooKeeperWatcher zkw = null;
324   private String hbckEphemeralNodePath = null;
325   private boolean hbckZodeCreated = false;
326 
327   /**
328    * Constructor
329    *
330    * @param conf Configuration object
331    * @throws MasterNotRunningException if the master is not running
332    * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
333    */
334   public HBaseFsck(Configuration conf) throws MasterNotRunningException,
335       ZooKeeperConnectionException, IOException, ClassNotFoundException {
336     this(conf, createThreadPool(conf));
337   }
338 
339   private static ExecutorService createThreadPool(Configuration conf) {
340     int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
341     return new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
342   }
343 
344   /**
345    * Constructor
346    *
347    * @param conf
348    *          Configuration object
349    * @throws MasterNotRunningException
350    *           if the master is not running
351    * @throws ZooKeeperConnectionException
352    *           if unable to connect to ZooKeeper
353    */
354   public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
355       ZooKeeperConnectionException, IOException, ClassNotFoundException {
356     super(conf);
357     errors = getErrorReporter(getConf());
358     this.executor = exec;
359     lockFileRetryCounterFactory = new RetryCounterFactory(
360       getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
361       getConf().getInt(
362         "hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
363       getConf().getInt(
364         "hbase.hbck.lockfile.attempt.maxsleeptime", DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
365     createZNodeRetryCounterFactory = new RetryCounterFactory(
366       getConf().getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
367       getConf().getInt(
368         "hbase.hbck.createznode.attempt.sleep.interval",
369         DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
370       getConf().getInt(
371         "hbase.hbck.createznode.attempt.maxsleeptime",
372         DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
373     zkw = createZooKeeperWatcher();
374   }
375 
376   private class FileLockCallable implements Callable<FSDataOutputStream> {
377     RetryCounter retryCounter;
378 
379     public FileLockCallable(RetryCounter retryCounter) {
380       this.retryCounter = retryCounter;
381     }
382     @Override
383     public FSDataOutputStream call() throws IOException {
384       try {
385         FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
386         FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
387             HConstants.DATA_FILE_UMASK_KEY);
388         Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
389         fs.mkdirs(tmpDir);
390         HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
391         final FSDataOutputStream out = createFileWithRetries(fs, HBCK_LOCK_PATH, defaultPerms);
392         out.writeBytes(InetAddress.getLocalHost().toString());
393         out.flush();
394         return out;
395       } catch(RemoteException e) {
396         if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
397           return null;
398         } else {
399           throw e;
400         }
401       }
402     }
403 
404     private FSDataOutputStream createFileWithRetries(final FileSystem fs,
405         final Path hbckLockFilePath, final FsPermission defaultPerms)
406         throws IOException {
407 
408       IOException exception = null;
409       do {
410         try {
411           return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
412         } catch (IOException ioe) {
413           LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
414               + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
415               + retryCounter.getMaxAttempts());
416           LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(), 
417               ioe);
418           try {
419             exception = ioe;
420             retryCounter.sleepUntilNextRetry();
421           } catch (InterruptedException ie) {
422             throw (InterruptedIOException) new InterruptedIOException(
423                 "Can't create lock file " + hbckLockFilePath.getName())
424             .initCause(ie);
425           }
426         }
427       } while (retryCounter.shouldRetry());
428 
429       throw exception;
430     }
431   }
432 
433   /**
434    * This method maintains a lock using a file. If the creation fails we return null
435    *
436    * @return FSDataOutputStream object corresponding to the newly opened lock file
437    * @throws IOException
438    */
439   private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
440     RetryCounter retryCounter = lockFileRetryCounterFactory.create();
441     FileLockCallable callable = new FileLockCallable(retryCounter);
442     ExecutorService executor = Executors.newFixedThreadPool(1);
443     FutureTask<FSDataOutputStream> futureTask = new FutureTask<FSDataOutputStream>(callable);
444     executor.execute(futureTask);
445     final int timeoutInSeconds = getConf().getInt(
446       "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
447     FSDataOutputStream stream = null;
448     try {
449       stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
450     } catch (ExecutionException ee) {
451       LOG.warn("Encountered exception when opening lock file", ee);
452     } catch (InterruptedException ie) {
453       LOG.warn("Interrupted when opening lock file", ie);
454       Thread.currentThread().interrupt();
455     } catch (TimeoutException exception) {
456       // took too long to obtain lock
457       LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
458       futureTask.cancel(true);
459     } finally {
460       executor.shutdownNow();
461     }
462     return stream;
463   }
464 
465   private void unlockHbck() {
466     if (hbckLockCleanup.compareAndSet(true, false)) {
467       RetryCounter retryCounter = lockFileRetryCounterFactory.create();
468       do {
469         try {
470           IOUtils.closeStream(hbckOutFd);
471           FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()),
472               HBCK_LOCK_PATH, true);
473           LOG.info("Finishing hbck");
474           return;
475         } catch (IOException ioe) {
476           LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
477               + (retryCounter.getAttemptTimes() + 1) + " of "
478               + retryCounter.getMaxAttempts());
479           LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
480           try {
481             retryCounter.sleepUntilNextRetry();
482           } catch (InterruptedException ie) {
483             Thread.currentThread().interrupt();
484             LOG.warn("Interrupted while deleting lock file" +
485                 HBCK_LOCK_PATH);
486             return;
487           }
488         }
489       } while (retryCounter.shouldRetry());
490     }
491   }
492 
493   /**
494    * To repair region consistency, one must call connect() in order to repair
495    * online state.
496    */
497   public void connect() throws IOException {
498 
499     // Check if another instance of balancer is running
500     hbckOutFd = checkAndMarkRunningHbck();
501     if (hbckOutFd == null) {
502       setRetCode(-1);
503       LOG.error("Another instance of hbck is running, exiting this instance.[If you are sure" +
504           " no other instance is running, delete the lock file " +
505           HBCK_LOCK_PATH + " and rerun the tool]");
506       throw new IOException("Duplicate hbck - Abort");
507     }
508 
509     // Make sure to cleanup the lock
510     hbckLockCleanup.set(true);
511 
512     // Add a shutdown hook to this thread, in case user tries to
513     // kill the hbck with a ctrl-c, we want to cleanup the lock so that
514     // it is available for further calls
515     Runtime.getRuntime().addShutdownHook(new Thread() {
516       @Override
517       public void run() {
518         IOUtils.closeStream(HBaseFsck.this);
519         cleanupHbckZnode();
520         unlockHbck();
521       }
522     });
523 
524     LOG.info("Launching hbck");
525 
526     connection = (ClusterConnection)ConnectionFactory.createConnection(getConf());
527     admin = connection.getAdmin();
528     meta = connection.getTable(TableName.META_TABLE_NAME);
529     status = admin.getClusterStatus();
530   }
531 
532   /**
533    * Get deployed regions according to the region servers.
534    */
535   private void loadDeployedRegions() throws IOException, InterruptedException {
536     // From the master, get a list of all known live region servers
537     Collection<ServerName> regionServers = status.getServers();
538     errors.print("Number of live region servers: " + regionServers.size());
539     if (details) {
540       for (ServerName rsinfo: regionServers) {
541         errors.print("  " + rsinfo.getServerName());
542       }
543     }
544 
545     // From the master, get a list of all dead region servers
546     Collection<ServerName> deadRegionServers = status.getDeadServerNames();
547     errors.print("Number of dead region servers: " + deadRegionServers.size());
548     if (details) {
549       for (ServerName name: deadRegionServers) {
550         errors.print("  " + name);
551       }
552     }
553 
554     // Print the current master name and state
555     errors.print("Master: " + status.getMaster());
556 
557     // Print the list of all backup masters
558     Collection<ServerName> backupMasters = status.getBackupMasters();
559     errors.print("Number of backup masters: " + backupMasters.size());
560     if (details) {
561       for (ServerName name: backupMasters) {
562         errors.print("  " + name);
563       }
564     }
565 
566     errors.print("Average load: " + status.getAverageLoad());
567     errors.print("Number of requests: " + status.getRequestsCount());
568     errors.print("Number of regions: " + status.getRegionsCount());
569 
570     Map<String, RegionState> rits = status.getRegionsInTransition();
571     errors.print("Number of regions in transition: " + rits.size());
572     if (details) {
573       for (RegionState state: rits.values()) {
574         errors.print("  " + state.toDescriptiveString());
575       }
576     }
577 
578     // Determine what's deployed
579     processRegionServers(regionServers);
580   }
581 
582   /**
583    * Clear the current state of hbck.
584    */
585   private void clearState() {
586     // Make sure regionInfo is empty before starting
587     fixes = 0;
588     regionInfoMap.clear();
589     emptyRegionInfoQualifiers.clear();
590     disabledTables.clear();
591     errors.clear();
592     tablesInfo.clear();
593     orphanHdfsDirs.clear();
594     skippedRegions.clear();
595   }
596 
597   /**
598    * This repair method analyzes hbase data in hdfs and repairs it to satisfy
599    * the table integrity rules.  HBase doesn't need to be online for this
600    * operation to work.
601    */
602   public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
603     // Initial pass to fix orphans.
604     if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
605         || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
606       LOG.info("Loading regioninfos HDFS");
607       // if nothing is happening this should always complete in two iterations.
608       int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
609       int curIter = 0;
610       do {
611         clearState(); // clears hbck state and reset fixes to 0 and.
612         // repair what's on HDFS
613         restoreHdfsIntegrity();
614         curIter++;// limit the number of iterations.
615       } while (fixes > 0 && curIter <= maxIterations);
616 
617       // Repairs should be done in the first iteration and verification in the second.
618       // If there are more than 2 passes, something funny has happened.
619       if (curIter > 2) {
620         if (curIter == maxIterations) {
621           LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
622               + "Tables integrity may not be fully repaired!");
623         } else {
624           LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
625         }
626       }
627     }
628   }
629 
630   /**
631    * This repair method requires the cluster to be online since it contacts
632    * region servers and the masters.  It makes each region's state in HDFS, in
633    * hbase:meta, and deployments consistent.
634    *
635    * @return If > 0 , number of errors detected, if < 0 there was an unrecoverable
636    * error.  If 0, we have a clean hbase.
637    */
638   public int onlineConsistencyRepair() throws IOException, KeeperException,
639     InterruptedException {
640     clearState();
641 
642     // get regions according to what is online on each RegionServer
643     loadDeployedRegions();
644     // check whether hbase:meta is deployed and online
645     recordMetaRegion();
646     // Check if hbase:meta is found only once and in the right place
647     if (!checkMetaRegion()) {
648       String errorMsg = "hbase:meta table is not consistent. ";
649       if (shouldFixAssignments()) {
650         errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
651       } else {
652         errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
653       }
654       errors.reportError(errorMsg + " Exiting...");
655       return -2;
656     }
657     // Not going with further consistency check for tables when hbase:meta itself is not consistent.
658     LOG.info("Loading regionsinfo from the hbase:meta table");
659     boolean success = loadMetaEntries();
660     if (!success) return -1;
661 
662     // Empty cells in hbase:meta?
663     reportEmptyMetaCells();
664 
665     // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
666     if (shouldFixEmptyMetaCells()) {
667       fixEmptyMetaCells();
668     }
669 
670     // get a list of all tables that have not changed recently.
671     if (!checkMetaOnly) {
672       reportTablesInFlux();
673     }
674 
675     // load regiondirs and regioninfos from HDFS
676     if (shouldCheckHdfs()) {
677       LOG.info("Loading region directories from HDFS");
678       loadHdfsRegionDirs();
679       LOG.info("Loading region information from HDFS");
680       loadHdfsRegionInfos();
681     }
682 
683     // Get disabled tables from ZooKeeper
684     loadDisabledTables();
685 
686     // fix the orphan tables
687     fixOrphanTables();
688 
689     LOG.info("Checking and fixing region consistency");
690 
691     // Check and fix consistency
692     checkAndFixConsistency();
693 
694     // Check integrity (does not fix)
695     checkIntegrity();
696     return errors.getErrorList().size();
697   }
698 
699   /**
700    * This method maintains an ephemeral znode. If the creation fails we return false or throw
701    * exception
702    *
703    * @return true if creating znode succeeds; false otherwise
704    * @throws IOException if IO failure occurs
705    */
706   private boolean setMasterInMaintenanceMode() throws IOException {
707     RetryCounter retryCounter = createZNodeRetryCounterFactory.create();
708     hbckEphemeralNodePath = ZKUtil.joinZNode(
709       ZooKeeperWatcher.masterMaintZNode,
710       "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime()));
711     do {
712       try {
713         hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null);
714         if (hbckZodeCreated) {
715           break;
716         }
717       } catch (KeeperException e) {
718         if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) {
719            throw new IOException("Can't create znode " + hbckEphemeralNodePath, e);
720         }
721         // fall through and retry
722       }
723 
724       LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try=" +
725           (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
726 
727       try {
728         retryCounter.sleepUntilNextRetry();
729       } catch (InterruptedException ie) {
730         throw (InterruptedIOException) new InterruptedIOException(
731               "Can't create znode " + hbckEphemeralNodePath).initCause(ie);
732       }
733     } while (retryCounter.shouldRetry());
734     return hbckZodeCreated;
735   }
736 
737   private void cleanupHbckZnode() {
738     try {
739       if (zkw != null && hbckZodeCreated) {
740         ZKUtil.deleteNode(zkw, hbckEphemeralNodePath);
741         hbckZodeCreated = false;
742       }
743     } catch (KeeperException e) {
744       // Ignore
745       if (!e.code().equals(KeeperException.Code.NONODE)) {
746         LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e);
747       }
748     }
749   }
750 
751   /**
752    * Contacts the master and prints out cluster-wide information
753    * @return 0 on success, non-zero on failure
754    */
755   public int onlineHbck()
756       throws IOException, KeeperException, InterruptedException, ServiceException {
757     // print hbase server version
758     errors.print("Version: " + status.getHBaseVersion());
759     offlineHdfsIntegrityRepair();
760 
761     // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
762     // hbck, it is likely that hbck would be misled and report transient errors.  Therefore, it
763     // is better to set Master into maintenance mode during online hbck.
764     //
765     if (!setMasterInMaintenanceMode()) {
766       LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient "
767         + "error.  Please run HBCK multiple times to reduce the chance of transient error.");
768     }
769 
770     onlineConsistencyRepair();
771 
772     if (checkRegionBoundaries) {
773       checkRegionBoundaries();
774     }
775 
776     offlineReferenceFileRepair();
777     offlineHLinkFileRepair();
778 
779     checkAndFixTableLocks();
780 
781     // Check (and fix if requested) orphaned table ZNodes
782     checkAndFixOrphanedTableZNodes();
783 
784     // Remove the hbck znode
785     cleanupHbckZnode();
786 
787     // Remove the hbck lock
788     unlockHbck();
789 
790     // Print table summary
791     printTableSummary(tablesInfo);
792     return errors.summarize();
793   }
794 
795   public static byte[] keyOnly (byte[] b) {
796     if (b == null)
797       return b;
798     int rowlength = Bytes.toShort(b, 0);
799     byte[] result = new byte[rowlength];
800     System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
801     return result;
802   }
803 
804   @Override
805   public void close() throws IOException {
806     try {
807       cleanupHbckZnode();
808       unlockHbck();
809     } catch (Exception io) {
810       LOG.warn(io);
811     } finally {
812       if (zkw != null) {
813         zkw.close();
814         zkw = null;
815       }
816       IOUtils.cleanup(null, admin, meta, connection);
817     }
818   }
819 
820   private static class RegionBoundariesInformation {
821     public byte [] regionName;
822     public byte [] metaFirstKey;
823     public byte [] metaLastKey;
824     public byte [] storesFirstKey;
825     public byte [] storesLastKey;
826     @Override
827     public String toString () {
828       return "regionName=" + Bytes.toStringBinary(regionName) +
829              "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
830              "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
831              "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
832              "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
833     }
834   }
835 
836   public void checkRegionBoundaries() {
837     try {
838       ByteArrayComparator comparator = new ByteArrayComparator();
839       List<HRegionInfo> regions = MetaScanner.listAllRegions(getConf(), connection, false);
840       final RegionBoundariesInformation currentRegionBoundariesInformation =
841           new RegionBoundariesInformation();
842       Path hbaseRoot = FSUtils.getRootDir(getConf());
843       for (HRegionInfo regionInfo : regions) {
844         Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
845         currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
846         // For each region, get the start and stop key from the META and compare them to the
847         // same information from the Stores.
848         Path path = new Path(tableDir, regionInfo.getEncodedName());
849         FileSystem fs = path.getFileSystem(getConf());
850         FileStatus[] files = fs.listStatus(path);
851         // For all the column families in this region...
852         byte[] storeFirstKey = null;
853         byte[] storeLastKey = null;
854         for (FileStatus file : files) {
855           String fileName = file.getPath().toString();
856           fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
857           if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
858             FileStatus[] storeFiles = fs.listStatus(file.getPath());
859             // For all the stores in this column family.
860             for (FileStatus storeFile : storeFiles) {
861               HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
862                   getConf()), getConf());
863               if ((reader.getFirstKey() != null)
864                   && ((storeFirstKey == null) || (comparator.compare(storeFirstKey,
865                       reader.getFirstKey()) > 0))) {
866                 storeFirstKey = reader.getFirstKey();
867               }
868               if ((reader.getLastKey() != null)
869                   && ((storeLastKey == null) || (comparator.compare(storeLastKey,
870                       reader.getLastKey())) < 0)) {
871                 storeLastKey = reader.getLastKey();
872               }
873               reader.close();
874             }
875           }
876         }
877         currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
878         currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
879         currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey);
880         currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey);
881         if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
882           currentRegionBoundariesInformation.metaFirstKey = null;
883         if (currentRegionBoundariesInformation.metaLastKey.length == 0)
884           currentRegionBoundariesInformation.metaLastKey = null;
885 
886         // For a region to be correct, we need the META start key to be smaller or equal to the
887         // smallest start key from all the stores, and the start key from the next META entry to
888         // be bigger than the last key from all the current stores. First region start key is null;
889         // Last region end key is null; some regions can be empty and not have any store.
890 
891         boolean valid = true;
892         // Checking start key.
893         if ((currentRegionBoundariesInformation.storesFirstKey != null)
894             && (currentRegionBoundariesInformation.metaFirstKey != null)) {
895           valid = valid
896               && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
897                 currentRegionBoundariesInformation.metaFirstKey) >= 0;
898         }
899         // Checking stop key.
900         if ((currentRegionBoundariesInformation.storesLastKey != null)
901             && (currentRegionBoundariesInformation.metaLastKey != null)) {
902           valid = valid
903               && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
904                 currentRegionBoundariesInformation.metaLastKey) < 0;
905         }
906         if (!valid) {
907           errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
908             tablesInfo.get(regionInfo.getTable()));
909           LOG.warn("Region's boundaries not alligned between stores and META for:");
910           LOG.warn(currentRegionBoundariesInformation);
911         }
912       }
913     } catch (IOException e) {
914       LOG.error(e);
915     }
916   }
917 
918   /**
919    * Iterates through the list of all orphan/invalid regiondirs.
920    */
921   private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
922     for (HbckInfo hi : orphanHdfsDirs) {
923       LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
924       adoptHdfsOrphan(hi);
925     }
926   }
927 
928   /**
929    * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
930    * these orphans by creating a new region, and moving the column families,
931    * recovered edits, WALs, into the new region dir.  We determine the region
932    * startkey and endkeys by looking at all of the hfiles inside the column
933    * families to identify the min and max keys. The resulting region will
934    * likely violate table integrity but will be dealt with by merging
935    * overlapping regions.
936    */
937   @SuppressWarnings("deprecation")
938   private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
939     Path p = hi.getHdfsRegionDir();
940     FileSystem fs = p.getFileSystem(getConf());
941     FileStatus[] dirs = fs.listStatus(p);
942     if (dirs == null) {
943       LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " +
944           p + ". This dir could probably be deleted.");
945       return ;
946     }
947 
948     TableName tableName = hi.getTableName();
949     TableInfo tableInfo = tablesInfo.get(tableName);
950     Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
951     HTableDescriptor template = tableInfo.getHTD();
952 
953     // find min and max key values
954     Pair<byte[],byte[]> orphanRegionRange = null;
955     for (FileStatus cf : dirs) {
956       String cfName= cf.getPath().getName();
957       // TODO Figure out what the special dirs are
958       if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
959 
960       FileStatus[] hfiles = fs.listStatus(cf.getPath());
961       for (FileStatus hfile : hfiles) {
962         byte[] start, end;
963         HFile.Reader hf = null;
964         try {
965           CacheConfig cacheConf = new CacheConfig(getConf());
966           hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
967           hf.loadFileInfo();
968           KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
969           start = startKv.getRow();
970           KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
971           end = endKv.getRow();
972         } catch (IOException ioe) {
973           LOG.warn("Problem reading orphan file " + hfile + ", skipping");
974           continue;
975         } catch (NullPointerException ioe) {
976           LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
977           continue;
978         } finally {
979           if (hf != null) {
980             hf.close();
981           }
982         }
983 
984         // expand the range to include the range of all hfiles
985         if (orphanRegionRange == null) {
986           // first range
987           orphanRegionRange = new Pair<byte[], byte[]>(start, end);
988         } else {
989           // TODO add test
990 
991           // expand range only if the hfile is wider.
992           if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
993             orphanRegionRange.setFirst(start);
994           }
995           if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
996             orphanRegionRange.setSecond(end);
997           }
998         }
999       }
1000     }
1001     if (orphanRegionRange == null) {
1002       LOG.warn("No data in dir " + p + ", sidelining data");
1003       fixes++;
1004       sidelineRegionDir(fs, hi);
1005       return;
1006     }
1007     LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
1008         Bytes.toString(orphanRegionRange.getSecond()) + ")");
1009 
1010     // create new region on hdfs.  move data into place.
1011     HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(),
1012       Bytes.add(orphanRegionRange.getSecond(), new byte[1]));
1013     LOG.info("Creating new region : " + hri);
1014     HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
1015     Path target = region.getRegionFileSystem().getRegionDir();
1016 
1017     // rename all the data to new region
1018     mergeRegionDirs(target, hi);
1019     fixes++;
1020   }
1021 
1022   /**
1023    * This method determines if there are table integrity errors in HDFS.  If
1024    * there are errors and the appropriate "fix" options are enabled, the method
1025    * will first correct orphan regions making them into legit regiondirs, and
1026    * then reload to merge potentially overlapping regions.
1027    *
1028    * @return number of table integrity errors found
1029    */
1030   private int restoreHdfsIntegrity() throws IOException, InterruptedException {
1031     // Determine what's on HDFS
1032     LOG.info("Loading HBase regioninfo from HDFS...");
1033     loadHdfsRegionDirs(); // populating regioninfo table.
1034 
1035     int errs = errors.getErrorList().size();
1036     // First time just get suggestions.
1037     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1038     checkHdfsIntegrity(false, false);
1039 
1040     if (errors.getErrorList().size() == errs) {
1041       LOG.info("No integrity errors.  We are done with this phase. Glorious.");
1042       return 0;
1043     }
1044 
1045     if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
1046       adoptHdfsOrphans(orphanHdfsDirs);
1047       // TODO optimize by incrementally adding instead of reloading.
1048     }
1049 
1050     // Make sure there are no holes now.
1051     if (shouldFixHdfsHoles()) {
1052       clearState(); // this also resets # fixes.
1053       loadHdfsRegionDirs();
1054       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1055       tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
1056     }
1057 
1058     // Now we fix overlaps
1059     if (shouldFixHdfsOverlaps()) {
1060       // second pass we fix overlaps.
1061       clearState(); // this also resets # fixes.
1062       loadHdfsRegionDirs();
1063       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1064       tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1065     }
1066 
1067     return errors.getErrorList().size();
1068   }
1069 
1070   /**
1071    * Scan all the store file names to find any lingering reference files,
1072    * which refer to some none-exiting files. If "fix" option is enabled,
1073    * any lingering reference file will be sidelined if found.
1074    * <p>
1075    * Lingering reference file prevents a region from opening. It has to
1076    * be fixed before a cluster can start properly.
1077    */
1078   private void offlineReferenceFileRepair() throws IOException, InterruptedException {
1079     Configuration conf = getConf();
1080     Path hbaseRoot = FSUtils.getRootDir(conf);
1081     FileSystem fs = hbaseRoot.getFileSystem(conf);
1082     LOG.info("Computing mapping of all store files");
1083     Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1084       new FSUtils.ReferenceFileFilter(fs), executor, errors);
1085     errors.print("");
1086     LOG.info("Validating mapping using HDFS state");
1087     for (Path path: allFiles.values()) {
1088       Path referredToFile = StoreFileInfo.getReferredToFile(path);
1089       if (fs.exists(referredToFile)) continue;  // good, expected
1090 
1091       // Found a lingering reference file
1092       errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1093         "Found lingering reference file " + path);
1094       if (!shouldFixReferenceFiles()) continue;
1095 
1096       // Now, trying to fix it since requested
1097       boolean success = false;
1098       String pathStr = path.toString();
1099 
1100       // A reference file path should be like
1101       // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1102       // Up 5 directories to get the root folder.
1103       // So the file will be sidelined to a similar folder structure.
1104       int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1105       for (int i = 0; index > 0 && i < 5; i++) {
1106         index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1107       }
1108       if (index > 0) {
1109         Path rootDir = getSidelineDir();
1110         Path dst = new Path(rootDir, pathStr.substring(index + 1));
1111         fs.mkdirs(dst.getParent());
1112         LOG.info("Trying to sildeline reference file "
1113           + path + " to " + dst);
1114         setShouldRerun();
1115 
1116         success = fs.rename(path, dst);
1117       }
1118       if (!success) {
1119         LOG.error("Failed to sideline reference file " + path);
1120       }
1121     }
1122   }
1123 
1124   /**
1125    * Scan all the store file names to find any lingering HFileLink files,
1126    * which refer to some none-exiting files. If "fix" option is enabled,
1127    * any lingering HFileLink file will be sidelined if found.
1128    */
1129   private void offlineHLinkFileRepair() throws IOException, InterruptedException {
1130     Configuration conf = getConf();
1131     Path hbaseRoot = FSUtils.getRootDir(conf);
1132     FileSystem fs = hbaseRoot.getFileSystem(conf);
1133     LOG.info("Computing mapping of all link files");
1134     Map<String, Path> allFiles = FSUtils
1135         .getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors);
1136     errors.print("");
1137 
1138     LOG.info("Validating mapping using HDFS state");
1139     for (Path path : allFiles.values()) {
1140       // building HFileLink object to gather locations
1141       HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
1142       if (actualLink.exists(fs)) continue; // good, expected
1143 
1144       // Found a lingering HFileLink
1145       errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
1146       if (!shouldFixHFileLinks()) continue;
1147 
1148       // Now, trying to fix it since requested
1149       setShouldRerun();
1150 
1151       // An HFileLink path should be like
1152       // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
1153       // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1154       boolean success = sidelineFile(fs, hbaseRoot, path);
1155 
1156       if (!success) {
1157         LOG.error("Failed to sideline HFileLink file " + path);
1158       }
1159 
1160       // An HFileLink backreference path should be like
1161       // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
1162       // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1163       Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil
1164               .getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()),
1165                   HFileLink.getReferencedRegionName(path.getName().toString()),
1166                   path.getParent().getName()),
1167           HFileLink.getReferencedHFileName(path.getName().toString()));
1168       success = sidelineFile(fs, hbaseRoot, backRefPath);
1169 
1170       if (!success) {
1171         LOG.error("Failed to sideline HFileLink backreference file " + path);
1172       }
1173     }
1174   }
1175 
1176   private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException {
1177     URI uri = hbaseRoot.toUri().relativize(path.toUri());
1178     if (uri.isAbsolute()) return false;
1179     String relativePath = uri.getPath();
1180     Path rootDir = getSidelineDir();
1181     Path dst = new Path(rootDir, relativePath);
1182     boolean pathCreated = fs.mkdirs(dst.getParent());
1183     if (!pathCreated) {
1184       LOG.error("Failed to create path: " + dst.getParent());
1185       return false;
1186     }
1187     LOG.info("Trying to sideline file " + path + " to " + dst);
1188     return fs.rename(path, dst);
1189   }
1190 
1191   /**
1192    * TODO -- need to add tests for this.
1193    */
1194   private void reportEmptyMetaCells() {
1195     errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1196       emptyRegionInfoQualifiers.size());
1197     if (details) {
1198       for (Result r: emptyRegionInfoQualifiers) {
1199         errors.print("  " + r);
1200       }
1201     }
1202   }
1203 
1204   /**
1205    * TODO -- need to add tests for this.
1206    */
1207   private void reportTablesInFlux() {
1208     AtomicInteger numSkipped = new AtomicInteger(0);
1209     HTableDescriptor[] allTables = getTables(numSkipped);
1210     errors.print("Number of Tables: " + allTables.length);
1211     if (details) {
1212       if (numSkipped.get() > 0) {
1213         errors.detail("Number of Tables in flux: " + numSkipped.get());
1214       }
1215       for (HTableDescriptor td : allTables) {
1216         errors.detail("  Table: " + td.getTableName() + "\t" +
1217                            (td.isReadOnly() ? "ro" : "rw") + "\t" +
1218                             (td.isMetaRegion() ? "META" : "    ") + "\t" +
1219                            " families: " + td.getFamilies().size());
1220       }
1221     }
1222   }
1223 
1224   public ErrorReporter getErrors() {
1225     return errors;
1226   }
1227 
1228   /**
1229    * Read the .regioninfo file from the file system.  If there is no
1230    * .regioninfo, add it to the orphan hdfs region list.
1231    */
1232   private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
1233     Path regionDir = hbi.getHdfsRegionDir();
1234     if (regionDir == null) {
1235       if (hbi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
1236         // Log warning only for default/ primary replica with no region dir
1237         LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
1238       }
1239       return;
1240     }
1241 
1242     if (hbi.hdfsEntry.hri != null) {
1243       // already loaded data
1244       return;
1245     }
1246 
1247     FileSystem fs = FileSystem.get(getConf());
1248     HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
1249     LOG.debug("HRegionInfo read: " + hri.toString());
1250     hbi.hdfsEntry.hri = hri;
1251   }
1252 
1253   /**
1254    * Exception thrown when a integrity repair operation fails in an
1255    * unresolvable way.
1256    */
1257   public static class RegionRepairException extends IOException {
1258     private static final long serialVersionUID = 1L;
1259     final IOException ioe;
1260     public RegionRepairException(String s, IOException ioe) {
1261       super(s);
1262       this.ioe = ioe;
1263     }
1264   }
1265 
1266   /**
1267    * Populate hbi's from regionInfos loaded from file system.
1268    */
1269   private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1270       throws IOException, InterruptedException {
1271     tablesInfo.clear(); // regenerating the data
1272     // generate region split structure
1273     Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1274 
1275     // Parallelized read of .regioninfo files.
1276     List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
1277     List<Future<Void>> hbiFutures;
1278 
1279     for (HbckInfo hbi : hbckInfos) {
1280       WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1281       hbis.add(work);
1282     }
1283 
1284     // Submit and wait for completion
1285     hbiFutures = executor.invokeAll(hbis);
1286 
1287     for(int i=0; i<hbiFutures.size(); i++) {
1288       WorkItemHdfsRegionInfo work = hbis.get(i);
1289       Future<Void> f = hbiFutures.get(i);
1290       try {
1291         f.get();
1292       } catch(ExecutionException e) {
1293         LOG.warn("Failed to read .regioninfo file for region " +
1294               work.hbi.getRegionNameAsString(), e.getCause());
1295       }
1296     }
1297 
1298     Path hbaseRoot = FSUtils.getRootDir(getConf());
1299     FileSystem fs = hbaseRoot.getFileSystem(getConf());
1300     // serialized table info gathering.
1301     for (HbckInfo hbi: hbckInfos) {
1302 
1303       if (hbi.getHdfsHRI() == null) {
1304         // was an orphan
1305         continue;
1306       }
1307 
1308 
1309       // get table name from hdfs, populate various HBaseFsck tables.
1310       TableName tableName = hbi.getTableName();
1311       if (tableName == null) {
1312         // There was an entry in hbase:meta not in the HDFS?
1313         LOG.warn("tableName was null for: " + hbi);
1314         continue;
1315       }
1316 
1317       TableInfo modTInfo = tablesInfo.get(tableName);
1318       if (modTInfo == null) {
1319         // only executed once per table.
1320         modTInfo = new TableInfo(tableName);
1321         tablesInfo.put(tableName, modTInfo);
1322         try {
1323           HTableDescriptor htd =
1324               FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1325           modTInfo.htds.add(htd);
1326         } catch (IOException ioe) {
1327           if (!orphanTableDirs.containsKey(tableName)) {
1328             LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1329             //should only report once for each table
1330             errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1331                 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1332             Set<String> columns = new HashSet<String>();
1333             orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1334           }
1335         }
1336       }
1337       if (!hbi.isSkipChecks()) {
1338         modTInfo.addRegionInfo(hbi);
1339       }
1340     }
1341 
1342     loadTableInfosForTablesWithNoRegion();
1343     errors.print("");
1344 
1345     return tablesInfo;
1346   }
1347 
1348   /**
1349    * To get the column family list according to the column family dirs
1350    * @param columns
1351    * @param hbi
1352    * @return a set of column families
1353    * @throws IOException
1354    */
1355   private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1356     Path regionDir = hbi.getHdfsRegionDir();
1357     FileSystem fs = regionDir.getFileSystem(getConf());
1358     FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1359     for (FileStatus subdir : subDirs) {
1360       String columnfamily = subdir.getPath().getName();
1361       columns.add(columnfamily);
1362     }
1363     return columns;
1364   }
1365 
1366   /**
1367    * To fabricate a .tableinfo file with following contents<br>
1368    * 1. the correct tablename <br>
1369    * 2. the correct colfamily list<br>
1370    * 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1371    * @throws IOException
1372    */
1373   private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1374       Set<String> columns) throws IOException {
1375     if (columns ==null || columns.isEmpty()) return false;
1376     HTableDescriptor htd = new HTableDescriptor(tableName);
1377     for (String columnfamimly : columns) {
1378       htd.addFamily(new HColumnDescriptor(columnfamimly));
1379     }
1380     fstd.createTableDescriptor(htd, true);
1381     return true;
1382   }
1383 
1384   /**
1385    * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1386    * @throws IOException
1387    */
1388   public void fixEmptyMetaCells() throws IOException {
1389     if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1390       LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1391       for (Result region : emptyRegionInfoQualifiers) {
1392         deleteMetaRegion(region.getRow());
1393         errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1394       }
1395       emptyRegionInfoQualifiers.clear();
1396     }
1397   }
1398 
1399   /**
1400    * To fix orphan table by creating a .tableinfo file under tableDir <br>
1401    * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1402    * 2. else create a default .tableinfo file with following items<br>
1403    * &nbsp;2.1 the correct tablename <br>
1404    * &nbsp;2.2 the correct colfamily list<br>
1405    * &nbsp;2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1406    * @throws IOException
1407    */
1408   public void fixOrphanTables() throws IOException {
1409     if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1410 
1411       List<TableName> tmpList = new ArrayList<TableName>();
1412       tmpList.addAll(orphanTableDirs.keySet());
1413       HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1414       Iterator<Entry<TableName, Set<String>>> iter =
1415           orphanTableDirs.entrySet().iterator();
1416       int j = 0;
1417       int numFailedCase = 0;
1418       FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1419       while (iter.hasNext()) {
1420         Entry<TableName, Set<String>> entry =
1421             iter.next();
1422         TableName tableName = entry.getKey();
1423         LOG.info("Trying to fix orphan table error: " + tableName);
1424         if (j < htds.length) {
1425           if (tableName.equals(htds[j].getTableName())) {
1426             HTableDescriptor htd = htds[j];
1427             LOG.info("fixing orphan table: " + tableName + " from cache");
1428             fstd.createTableDescriptor(htd, true);
1429             j++;
1430             iter.remove();
1431           }
1432         } else {
1433           if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1434             LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1435             LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1436             iter.remove();
1437           } else {
1438             LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1439             numFailedCase++;
1440           }
1441         }
1442         fixes++;
1443       }
1444 
1445       if (orphanTableDirs.isEmpty()) {
1446         // all orphanTableDirs are luckily recovered
1447         // re-run doFsck after recovering the .tableinfo file
1448         setShouldRerun();
1449         LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1450       } else if (numFailedCase > 0) {
1451         LOG.error("Failed to fix " + numFailedCase
1452             + " OrphanTables with default .tableinfo files");
1453       }
1454 
1455     }
1456     //cleanup the list
1457     orphanTableDirs.clear();
1458 
1459   }
1460 
1461   /**
1462    * This borrows code from MasterFileSystem.bootstrap()
1463    *
1464    * @return an open hbase:meta HRegion
1465    */
1466   private HRegion createNewMeta() throws IOException {
1467       Path rootdir = FSUtils.getRootDir(getConf());
1468     Configuration c = getConf();
1469     HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1470     HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1471     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1472     HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor);
1473     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1474     return meta;
1475   }
1476 
1477   /**
1478    * Generate set of puts to add to new meta.  This expects the tables to be
1479    * clean with no overlaps or holes.  If there are any problems it returns null.
1480    *
1481    * @return An array list of puts to do in bulk, null if tables have problems
1482    */
1483   private ArrayList<Put> generatePuts(
1484       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1485     ArrayList<Put> puts = new ArrayList<Put>();
1486     boolean hasProblems = false;
1487     for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1488       TableName name = e.getKey();
1489 
1490       // skip "hbase:meta"
1491       if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1492         continue;
1493       }
1494 
1495       TableInfo ti = e.getValue();
1496       for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1497           .entrySet()) {
1498         Collection<HbckInfo> his = spl.getValue();
1499         int sz = his.size();
1500         if (sz != 1) {
1501           // problem
1502           LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1503               + " had " +  sz + " regions instead of exactly 1." );
1504           hasProblems = true;
1505           continue;
1506         }
1507 
1508         // add the row directly to meta.
1509         HbckInfo hi = his.iterator().next();
1510         HRegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
1511         Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1512         puts.add(p);
1513       }
1514     }
1515     return hasProblems ? null : puts;
1516   }
1517 
1518   /**
1519    * Suggest fixes for each table
1520    */
1521   private void suggestFixes(
1522       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1523     logParallelMerge();
1524     for (TableInfo tInfo : tablesInfo.values()) {
1525       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1526       tInfo.checkRegionChain(handler);
1527     }
1528   }
1529 
1530   /**
1531    * Rebuilds meta from information in hdfs/fs.  Depends on configuration
1532    * settings passed into hbck constructor to point to a particular fs/dir.
1533    *
1534    * @param fix flag that determines if method should attempt to fix holes
1535    * @return true if successful, false if attempt failed.
1536    */
1537   public boolean rebuildMeta(boolean fix) throws IOException,
1538       InterruptedException {
1539 
1540     // TODO check to make sure hbase is offline. (or at least the table
1541     // currently being worked on is off line)
1542 
1543     // Determine what's on HDFS
1544     LOG.info("Loading HBase regioninfo from HDFS...");
1545     loadHdfsRegionDirs(); // populating regioninfo table.
1546 
1547     int errs = errors.getErrorList().size();
1548     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1549     checkHdfsIntegrity(false, false);
1550 
1551     // make sure ok.
1552     if (errors.getErrorList().size() != errs) {
1553       // While in error state, iterate until no more fixes possible
1554       while(true) {
1555         fixes = 0;
1556         suggestFixes(tablesInfo);
1557         errors.clear();
1558         loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1559         checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1560 
1561         int errCount = errors.getErrorList().size();
1562 
1563         if (fixes == 0) {
1564           if (errCount > 0) {
1565             return false; // failed to fix problems.
1566           } else {
1567             break; // no fixes and no problems? drop out and fix stuff!
1568           }
1569         }
1570       }
1571     }
1572 
1573     // we can rebuild, move old meta out of the way and start
1574     LOG.info("HDFS regioninfo's seems good.  Sidelining old hbase:meta");
1575     Path backupDir = sidelineOldMeta();
1576 
1577     LOG.info("Creating new hbase:meta");
1578     HRegion meta = createNewMeta();
1579 
1580     // populate meta
1581     List<Put> puts = generatePuts(tablesInfo);
1582     if (puts == null) {
1583       LOG.fatal("Problem encountered when creating new hbase:meta entries.  " +
1584         "You may need to restore the previously sidelined hbase:meta");
1585       return false;
1586     }
1587     meta.batchMutate(puts.toArray(new Put[puts.size()]));
1588     HRegion.closeHRegion(meta);
1589     LOG.info("Success! hbase:meta table rebuilt.");
1590     LOG.info("Old hbase:meta is moved into " + backupDir);
1591     return true;
1592   }
1593 
1594   /**
1595    * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1596    */
1597   private void logParallelMerge() {
1598     if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1599       LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1600           " false to run serially.");
1601     } else {
1602       LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
1603           " true to run in parallel.");
1604     }
1605   }
1606 
1607   private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1608       boolean fixOverlaps) throws IOException {
1609     LOG.info("Checking HBase region split map from HDFS data...");
1610     logParallelMerge();
1611     for (TableInfo tInfo : tablesInfo.values()) {
1612       TableIntegrityErrorHandler handler;
1613       if (fixHoles || fixOverlaps) {
1614         handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1615           fixHoles, fixOverlaps);
1616       } else {
1617         handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1618       }
1619       if (!tInfo.checkRegionChain(handler)) {
1620         // should dump info as well.
1621         errors.report("Found inconsistency in table " + tInfo.getName());
1622       }
1623     }
1624     return tablesInfo;
1625   }
1626 
1627   private Path getSidelineDir() throws IOException {
1628     if (sidelineDir == null) {
1629       Path hbaseDir = FSUtils.getRootDir(getConf());
1630       Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1631       sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1632           + startMillis);
1633     }
1634     return sidelineDir;
1635   }
1636 
1637   /**
1638    * Sideline a region dir (instead of deleting it)
1639    */
1640   Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1641     return sidelineRegionDir(fs, null, hi);
1642   }
1643 
1644   /**
1645    * Sideline a region dir (instead of deleting it)
1646    *
1647    * @param parentDir if specified, the region will be sidelined to
1648    * folder like .../parentDir/<table name>/<region name>. The purpose
1649    * is to group together similar regions sidelined, for example, those
1650    * regions should be bulk loaded back later on. If null, it is ignored.
1651    */
1652   Path sidelineRegionDir(FileSystem fs,
1653       String parentDir, HbckInfo hi) throws IOException {
1654     TableName tableName = hi.getTableName();
1655     Path regionDir = hi.getHdfsRegionDir();
1656 
1657     if (!fs.exists(regionDir)) {
1658       LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1659       return null;
1660     }
1661 
1662     Path rootDir = getSidelineDir();
1663     if (parentDir != null) {
1664       rootDir = new Path(rootDir, parentDir);
1665     }
1666     Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1667     Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1668     fs.mkdirs(sidelineRegionDir);
1669     boolean success = false;
1670     FileStatus[] cfs =  fs.listStatus(regionDir);
1671     if (cfs == null) {
1672       LOG.info("Region dir is empty: " + regionDir);
1673     } else {
1674       for (FileStatus cf : cfs) {
1675         Path src = cf.getPath();
1676         Path dst =  new Path(sidelineRegionDir, src.getName());
1677         if (fs.isFile(src)) {
1678           // simple file
1679           success = fs.rename(src, dst);
1680           if (!success) {
1681             String msg = "Unable to rename file " + src +  " to " + dst;
1682             LOG.error(msg);
1683             throw new IOException(msg);
1684           }
1685           continue;
1686         }
1687 
1688         // is a directory.
1689         fs.mkdirs(dst);
1690 
1691         LOG.info("Sidelining files from " + src + " into containing region " + dst);
1692         // FileSystem.rename is inconsistent with directories -- if the
1693         // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1694         // it moves the src into the dst dir resulting in (foo/a/b).  If
1695         // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1696         FileStatus[] hfiles = fs.listStatus(src);
1697         if (hfiles != null && hfiles.length > 0) {
1698           for (FileStatus hfile : hfiles) {
1699             success = fs.rename(hfile.getPath(), dst);
1700             if (!success) {
1701               String msg = "Unable to rename file " + src +  " to " + dst;
1702               LOG.error(msg);
1703               throw new IOException(msg);
1704             }
1705           }
1706         }
1707         LOG.debug("Sideline directory contents:");
1708         debugLsr(sidelineRegionDir);
1709       }
1710     }
1711 
1712     LOG.info("Removing old region dir: " + regionDir);
1713     success = fs.delete(regionDir, true);
1714     if (!success) {
1715       String msg = "Unable to delete dir " + regionDir;
1716       LOG.error(msg);
1717       throw new IOException(msg);
1718     }
1719     return sidelineRegionDir;
1720   }
1721 
1722   /**
1723    * Side line an entire table.
1724    */
1725   void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1726       Path backupHbaseDir) throws IOException {
1727     Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1728     if (fs.exists(tableDir)) {
1729       Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1730       fs.mkdirs(backupTableDir.getParent());
1731       boolean success = fs.rename(tableDir, backupTableDir);
1732       if (!success) {
1733         throw new IOException("Failed to move  " + tableName + " from "
1734             +  tableDir + " to " + backupTableDir);
1735       }
1736     } else {
1737       LOG.info("No previous " + tableName +  " exists.  Continuing.");
1738     }
1739   }
1740 
1741   /**
1742    * @return Path to backup of original directory
1743    */
1744   Path sidelineOldMeta() throws IOException {
1745     // put current hbase:meta aside.
1746     Path hbaseDir = FSUtils.getRootDir(getConf());
1747     FileSystem fs = hbaseDir.getFileSystem(getConf());
1748     Path backupDir = getSidelineDir();
1749     fs.mkdirs(backupDir);
1750 
1751     try {
1752       sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1753     } catch (IOException e) {
1754         LOG.fatal("... failed to sideline meta. Currently in inconsistent state.  To restore "
1755             + "try to rename hbase:meta in " + backupDir.getName() + " to "
1756             + hbaseDir.getName() + ".", e);
1757       throw e; // throw original exception
1758     }
1759     return backupDir;
1760   }
1761 
1762   /**
1763    * Load the list of disabled tables in ZK into local set.
1764    * @throws ZooKeeperConnectionException
1765    * @throws IOException
1766    */
1767   private void loadDisabledTables()
1768   throws ZooKeeperConnectionException, IOException {
1769     HConnectionManager.execute(new HConnectable<Void>(getConf()) {
1770       @Override
1771       public Void connect(HConnection connection) throws IOException {
1772         try {
1773           for (TableName tableName :
1774               ZKTableStateClientSideReader.getDisabledOrDisablingTables(zkw)) {
1775             disabledTables.add(tableName);
1776           }
1777         } catch (KeeperException ke) {
1778           throw new IOException(ke);
1779         } catch (InterruptedException e) {
1780           throw new InterruptedIOException();
1781         }
1782         return null;
1783       }
1784     });
1785   }
1786 
1787   /**
1788    * Check if the specified region's table is disabled.
1789    */
1790   private boolean isTableDisabled(HRegionInfo regionInfo) {
1791     return disabledTables.contains(regionInfo.getTable());
1792   }
1793 
1794   /**
1795    * Scan HDFS for all regions, recording their information into
1796    * regionInfoMap
1797    */
1798   public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1799     Path rootDir = FSUtils.getRootDir(getConf());
1800     FileSystem fs = rootDir.getFileSystem(getConf());
1801 
1802     // list all tables from HDFS
1803     List<FileStatus> tableDirs = Lists.newArrayList();
1804 
1805     boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1806 
1807     List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1808     for (Path path : paths) {
1809       TableName tableName = FSUtils.getTableName(path);
1810        if ((!checkMetaOnly &&
1811            isTableIncluded(tableName)) ||
1812            tableName.equals(TableName.META_TABLE_NAME)) {
1813          tableDirs.add(fs.getFileStatus(path));
1814        }
1815     }
1816 
1817     // verify that version file exists
1818     if (!foundVersionFile) {
1819       errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1820           "Version file does not exist in root dir " + rootDir);
1821       if (shouldFixVersionFile()) {
1822         LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1823             + " file.");
1824         setShouldRerun();
1825         FSUtils.setVersion(fs, rootDir, getConf().getInt(
1826             HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1827             HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1828             HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1829       }
1830     }
1831 
1832     // Avoid multithreading at table-level because already multithreaded internally at
1833     // region-level.  Additionally multithreading at table-level can lead to deadlock
1834     // if there are many tables in the cluster.  Since there are a limited # of threads
1835     // in the executor's thread pool and if we multithread at the table-level by putting
1836     // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1837     // executor tied up solely in waiting for the tables' region-level calls to complete.
1838     // If there are enough tables then there will be no actual threads in the pool left
1839     // for the region-level callables to be serviced.
1840     for (FileStatus tableDir : tableDirs) {
1841       LOG.debug("Loading region dirs from " +tableDir.getPath());
1842       WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
1843       try {
1844         item.call();
1845       } catch (ExecutionException e) {
1846         LOG.warn("Could not completely load table dir " +
1847             tableDir.getPath(), e.getCause());
1848       }
1849     }
1850     errors.print("");
1851   }
1852 
1853   /**
1854    * Record the location of the hbase:meta region as found in ZooKeeper.
1855    */
1856   private boolean recordMetaRegion() throws IOException {
1857     RegionLocations rl = ((ClusterConnection)connection).locateRegion(TableName.META_TABLE_NAME,
1858         HConstants.EMPTY_START_ROW, false, false);
1859     if (rl == null) {
1860       errors.reportError(ERROR_CODE.NULL_META_REGION,
1861           "META region or some of its attributes are null.");
1862       return false;
1863     }
1864     for (HRegionLocation metaLocation : rl.getRegionLocations()) {
1865       // Check if Meta region is valid and existing
1866       if (metaLocation == null || metaLocation.getRegionInfo() == null ||
1867           metaLocation.getHostname() == null) {
1868         errors.reportError(ERROR_CODE.NULL_META_REGION,
1869             "META region or some of its attributes are null.");
1870         return false;
1871       }
1872       ServerName sn = metaLocation.getServerName();
1873       MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, EnvironmentEdgeManager.currentTime());
1874       HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1875       if (hbckInfo == null) {
1876         regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1877       } else {
1878         hbckInfo.metaEntry = m;
1879       }
1880     }
1881     return true;
1882   }
1883 
1884   private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1885     return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1886       @Override
1887       public void abort(String why, Throwable e) {
1888         LOG.error(why, e);
1889         System.exit(1);
1890       }
1891 
1892       @Override
1893       public boolean isAborted() {
1894         return false;
1895       }
1896 
1897     });
1898   }
1899 
1900 
1901   /**
1902    * Contacts each regionserver and fetches metadata about regions.
1903    * @param regionServerList - the list of region servers to connect to
1904    * @throws IOException if a remote or network exception occurs
1905    */
1906   void processRegionServers(Collection<ServerName> regionServerList)
1907     throws IOException, InterruptedException {
1908 
1909     List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1910     List<Future<Void>> workFutures;
1911 
1912     // loop to contact each region server in parallel
1913     for (ServerName rsinfo: regionServerList) {
1914       workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1915     }
1916 
1917     workFutures = executor.invokeAll(workItems);
1918 
1919     for(int i=0; i<workFutures.size(); i++) {
1920       WorkItemRegion item = workItems.get(i);
1921       Future<Void> f = workFutures.get(i);
1922       try {
1923         f.get();
1924       } catch(ExecutionException e) {
1925         LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1926             e.getCause());
1927       }
1928     }
1929   }
1930 
1931   /**
1932    * Check consistency of all regions that have been found in previous phases.
1933    */
1934   private void checkAndFixConsistency()
1935   throws IOException, KeeperException, InterruptedException {
1936 	  // Divide the checks in two phases. One for default/primary replicas and another
1937 	  // for the non-primary ones. Keeps code cleaner this way.
1938     List<CheckRegionConsistencyWorkItem> workItems =
1939         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1940     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1941       if (e.getValue().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
1942         workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1943       }
1944     }
1945     checkRegionConsistencyConcurrently(workItems);
1946 
1947     boolean prevHdfsCheck = shouldCheckHdfs();
1948     setCheckHdfs(false); //replicas don't have any hdfs data
1949     // Run a pass over the replicas and fix any assignment issues that exist on the currently
1950     // deployed/undeployed replicas.
1951     List<CheckRegionConsistencyWorkItem> replicaWorkItems =
1952         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1953     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1954       if (e.getValue().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
1955         replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1956       }
1957     }
1958     checkRegionConsistencyConcurrently(replicaWorkItems);
1959     setCheckHdfs(prevHdfsCheck);
1960 
1961     // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
1962     // not get accurate state of the hbase if continuing. The config here allows users to tune
1963     // the tolerance of number of skipped region.
1964     // TODO: evaluate the consequence to continue the hbck operation without config.
1965     int terminateThreshold =  getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
1966     int numOfSkippedRegions = skippedRegions.size();
1967     if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
1968       throw new IOException(numOfSkippedRegions
1969         + " region(s) could not be checked or repaired.  See logs for detail.");
1970     }
1971   }
1972 
1973   /**
1974    * Check consistency of all regions using mulitple threads concurrently.
1975    */
1976   private void checkRegionConsistencyConcurrently(
1977     final List<CheckRegionConsistencyWorkItem> workItems)
1978     throws IOException, KeeperException, InterruptedException {
1979     if (workItems.isEmpty()) {
1980       return;  // nothing to check
1981     }
1982 
1983     List<Future<Void>> workFutures = executor.invokeAll(workItems);
1984     for(Future<Void> f: workFutures) {
1985       try {
1986         f.get();
1987       } catch(ExecutionException e1) {
1988         LOG.warn("Could not check region consistency " , e1.getCause());
1989         if (e1.getCause() instanceof IOException) {
1990           throw (IOException)e1.getCause();
1991         } else if (e1.getCause() instanceof KeeperException) {
1992           throw (KeeperException)e1.getCause();
1993         } else if (e1.getCause() instanceof InterruptedException) {
1994           throw (InterruptedException)e1.getCause();
1995         } else {
1996           throw new IOException(e1.getCause());
1997         }
1998       }
1999     }
2000   }
2001 
2002   class CheckRegionConsistencyWorkItem implements Callable<Void> {
2003     private final String key;
2004     private final HbckInfo hbi;
2005 
2006     CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {
2007       this.key = key;
2008       this.hbi = hbi;
2009     }
2010 
2011     @Override
2012     public synchronized Void call() throws Exception {
2013       try {
2014         checkRegionConsistency(key, hbi);
2015       } catch (Exception e) {
2016         // If the region is non-META region, skip this region and send warning/error message; if
2017         // the region is META region, we should not continue.
2018         LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()
2019           + "'.", e);
2020         if (hbi.getHdfsHRI().isMetaRegion()) {
2021           throw e;
2022         }
2023         LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
2024         addSkippedRegion(hbi);
2025       }
2026       return null;
2027     }
2028   }
2029 
2030   private void addSkippedRegion(final HbckInfo hbi) {
2031     Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
2032     if (skippedRegionNames == null) {
2033       skippedRegionNames = new HashSet<String>();
2034     }
2035     skippedRegionNames.add(hbi.getRegionNameAsString());
2036     skippedRegions.put(hbi.getTableName(), skippedRegionNames);
2037   }
2038 
2039   private void preCheckPermission() throws IOException, AccessDeniedException {
2040     if (shouldIgnorePreCheckPermission()) {
2041       return;
2042     }
2043 
2044     Path hbaseDir = FSUtils.getRootDir(getConf());
2045     FileSystem fs = hbaseDir.getFileSystem(getConf());
2046     UserProvider userProvider = UserProvider.instantiate(getConf());
2047     UserGroupInformation ugi = userProvider.getCurrent().getUGI();
2048     FileStatus[] files = fs.listStatus(hbaseDir);
2049     for (FileStatus file : files) {
2050       try {
2051         FSUtils.checkAccess(ugi, file, FsAction.WRITE);
2052       } catch (AccessDeniedException ace) {
2053         LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
2054         errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
2055           + " does not have write perms to " + file.getPath()
2056           + ". Please rerun hbck as hdfs user " + file.getOwner());
2057         throw ace;
2058       }
2059     }
2060   }
2061 
2062   /**
2063    * Deletes region from meta table
2064    */
2065   private void deleteMetaRegion(HbckInfo hi) throws IOException {
2066     deleteMetaRegion(hi.metaEntry.getRegionName());
2067   }
2068 
2069   /**
2070    * Deletes region from meta table
2071    */
2072   private void deleteMetaRegion(byte[] metaKey) throws IOException {
2073     Delete d = new Delete(metaKey);
2074     meta.delete(d);
2075     LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
2076   }
2077 
2078   /**
2079    * Reset the split parent region info in meta table
2080    */
2081   private void resetSplitParent(HbckInfo hi) throws IOException {
2082     RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
2083     Delete d = new Delete(hi.metaEntry.getRegionName());
2084     d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
2085     d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
2086     mutations.add(d);
2087 
2088     HRegionInfo hri = new HRegionInfo(hi.metaEntry);
2089     hri.setOffline(false);
2090     hri.setSplit(false);
2091     Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
2092     mutations.add(p);
2093 
2094     meta.mutateRow(mutations);
2095     LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
2096   }
2097 
2098   /**
2099    * This backwards-compatibility wrapper for permanently offlining a region
2100    * that should not be alive.  If the region server does not support the
2101    * "offline" method, it will use the closest unassign method instead.  This
2102    * will basically work until one attempts to disable or delete the affected
2103    * table.  The problem has to do with in-memory only master state, so
2104    * restarting the HMaster or failing over to another should fix this.
2105    */
2106   private void offline(byte[] regionName) throws IOException {
2107     String regionString = Bytes.toStringBinary(regionName);
2108     if (!rsSupportsOffline) {
2109       LOG.warn("Using unassign region " + regionString
2110           + " instead of using offline method, you should"
2111           + " restart HMaster after these repairs");
2112       admin.unassign(regionName, true);
2113       return;
2114     }
2115 
2116     // first time we assume the rs's supports #offline.
2117     try {
2118       LOG.info("Offlining region " + regionString);
2119       admin.offline(regionName);
2120     } catch (IOException ioe) {
2121       String notFoundMsg = "java.lang.NoSuchMethodException: " +
2122         "org.apache.hadoop.hbase.master.HMaster.offline([B)";
2123       if (ioe.getMessage().contains(notFoundMsg)) {
2124         LOG.warn("Using unassign region " + regionString
2125             + " instead of using offline method, you should"
2126             + " restart HMaster after these repairs");
2127         rsSupportsOffline = false; // in the future just use unassign
2128         admin.unassign(regionName, true);
2129         return;
2130       }
2131       throw ioe;
2132     }
2133   }
2134 
2135   private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
2136     undeployRegionsForHbi(hi);
2137     // undeploy replicas of the region (but only if the method is invoked for the primary)
2138     if (hi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2139       return;
2140     }
2141     int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2142     for (int i = 1; i < numReplicas; i++) {
2143       if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2144       HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
2145           hi.getPrimaryHRIForDeployedReplica(), i);
2146       HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2147       if (h != null) {
2148         undeployRegionsForHbi(h);
2149         //set skip checks; we undeployed it, and we don't want to evaluate this anymore
2150         //in consistency checks
2151         h.setSkipChecks(true);
2152       }
2153     }
2154   }
2155 
2156   private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {
2157     for (OnlineEntry rse : hi.deployedEntries) {
2158       LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
2159       try {
2160         HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);
2161         offline(rse.hri.getRegionName());
2162       } catch (IOException ioe) {
2163         LOG.warn("Got exception when attempting to offline region "
2164             + Bytes.toString(rse.hri.getRegionName()), ioe);
2165       }
2166     }
2167   }
2168 
2169   /**
2170    * Attempts to undeploy a region from a region server based in information in
2171    * META.  Any operations that modify the file system should make sure that
2172    * its corresponding region is not deployed to prevent data races.
2173    *
2174    * A separate call is required to update the master in-memory region state
2175    * kept in the AssignementManager.  Because disable uses this state instead of
2176    * that found in META, we can't seem to cleanly disable/delete tables that
2177    * have been hbck fixed.  When used on a version of HBase that does not have
2178    * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master
2179    * restart or failover may be required.
2180    */
2181   private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
2182     if (hi.metaEntry == null && hi.hdfsEntry == null) {
2183       undeployRegions(hi);
2184       return;
2185     }
2186 
2187     // get assignment info and hregioninfo from meta.
2188     Get get = new Get(hi.getRegionName());
2189     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2190     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
2191     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
2192     // also get the locations of the replicas to close if the primary region is being closed
2193     if (hi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2194       int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2195       for (int i = 0; i < numReplicas; i++) {
2196         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
2197         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
2198       }
2199     }
2200     Result r = meta.get(get);
2201     RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
2202     if (rl == null) {
2203       LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2204           " since meta does not have handle to reach it");
2205       return;
2206     }
2207     for (HRegionLocation h : rl.getRegionLocations()) {
2208       ServerName serverName = h.getServerName();
2209       if (serverName == null) {
2210         errors.reportError("Unable to close region "
2211             + hi.getRegionNameAsString() +  " because meta does not "
2212             + "have handle to reach it.");
2213         continue;
2214       }
2215       HRegionInfo hri = h.getRegionInfo();
2216       if (hri == null) {
2217         LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2218             + " because hbase:meta had invalid or missing "
2219             + HConstants.CATALOG_FAMILY_STR + ":"
2220             + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2221             + " qualifier value.");
2222         continue;
2223       }
2224       // close the region -- close files and remove assignment
2225       HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2226     }
2227   }
2228 
2229   private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
2230     KeeperException, InterruptedException {
2231     // If we are trying to fix the errors
2232     if (shouldFixAssignments()) {
2233       errors.print(msg);
2234       undeployRegions(hbi);
2235       setShouldRerun();
2236       HRegionInfo hri = hbi.getHdfsHRI();
2237       if (hri == null) {
2238         hri = hbi.metaEntry;
2239       }
2240       HBaseFsckRepair.fixUnassigned(admin, hri);
2241       HBaseFsckRepair.waitUntilAssigned(admin, hri);
2242 
2243       // also assign replicas if needed (do it only when this call operates on a primary replica)
2244       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) return;
2245       int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();
2246       for (int i = 1; i < replicationCount; i++) {
2247         hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2248         HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2249         if (h != null) {
2250           undeployRegions(h);
2251           //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2252           //in consistency checks
2253           h.setSkipChecks(true);
2254         }
2255         HBaseFsckRepair.fixUnassigned(admin, hri);
2256         HBaseFsckRepair.waitUntilAssigned(admin, hri);
2257       }
2258 
2259     }
2260   }
2261 
2262   /**
2263    * Check a single region for consistency and correct deployment.
2264    */
2265   private void checkRegionConsistency(final String key, final HbckInfo hbi)
2266   throws IOException, KeeperException, InterruptedException {
2267 
2268 	if (hbi.isSkipChecks()) return;
2269 	String descriptiveName = hbi.toString();
2270     boolean inMeta = hbi.metaEntry != null;
2271     // In case not checking HDFS, assume the region is on HDFS
2272     boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2273     boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
2274     boolean isDeployed = !hbi.deployedOn.isEmpty();
2275     boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
2276     boolean deploymentMatchesMeta =
2277       hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2278       hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
2279     boolean splitParent =
2280       (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
2281     boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
2282     boolean recentlyModified = inHdfs &&
2283       hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2284 
2285     // ========== First the healthy cases =============
2286     if (hbi.containsOnlyHdfsEdits()) {
2287       return;
2288     }
2289     if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2290       return;
2291     } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2292       LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2293         "tabled that is not deployed");
2294       return;
2295     } else if (recentlyModified) {
2296       LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2297       return;
2298     }
2299     // ========== Cases where the region is not in hbase:meta =============
2300     else if (!inMeta && !inHdfs && !isDeployed) {
2301       // We shouldn't have record of this region at all then!
2302       assert false : "Entry for region with no data";
2303     } else if (!inMeta && !inHdfs && isDeployed) {
2304       errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2305           + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2306           "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2307       if (shouldFixAssignments()) {
2308         undeployRegions(hbi);
2309       }
2310 
2311     } else if (!inMeta && inHdfs && !isDeployed) {
2312       if (hbi.isMerged()) {
2313         // This region has already been merged, the remaining hdfs file will be
2314         // cleaned by CatalogJanitor later
2315         hbi.setSkipChecks(true);
2316         LOG.info("Region " + descriptiveName
2317             + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2318         return;
2319       }
2320       errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2321           + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2322           "or deployed on any region server");
2323       // restore region consistency of an adopted orphan
2324       if (shouldFixMeta()) {
2325         if (!hbi.isHdfsRegioninfoPresent()) {
2326           LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2327               +  " in table integrity repair phase if -fixHdfsOrphans was" +
2328               " used.");
2329           return;
2330         }
2331 
2332         HRegionInfo hri = hbi.getHdfsHRI();
2333         TableInfo tableInfo = tablesInfo.get(hri.getTable());
2334 
2335         for (HRegionInfo region : tableInfo.getRegionsFromMeta()) {
2336           if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2337               && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2338                 hri.getEndKey()) >= 0)
2339               && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2340             if(region.isSplit() || region.isOffline()) continue;
2341             Path regionDir = hbi.getHdfsRegionDir();
2342             FileSystem fs = regionDir.getFileSystem(getConf());
2343             List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2344             for (Path familyDir : familyDirs) {
2345               List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2346               for (Path referenceFilePath : referenceFilePaths) {
2347                 Path parentRegionDir =
2348                     StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2349                 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2350                   LOG.warn(hri + " start and stop keys are in the range of " + region
2351                       + ". The region might not be cleaned up from hdfs when region " + region
2352                       + " split failed. Hence deleting from hdfs.");
2353                   HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2354                     regionDir.getParent(), hri);
2355                   return;
2356                 }
2357               }
2358             }
2359           }
2360         }
2361 
2362         LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2363         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2364         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2365             admin.getClusterStatus().getServers(), numReplicas);
2366 
2367         tryAssignmentRepair(hbi, "Trying to reassign region...");
2368       }
2369 
2370     } else if (!inMeta && inHdfs && isDeployed) {
2371       errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2372           + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2373       debugLsr(hbi.getHdfsRegionDir());
2374       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2375         // for replicas, this means that we should undeploy the region (we would have
2376         // gone over the primaries and fixed meta holes in first phase under
2377         // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2378         // this stage unless unwanted replica)
2379         if (shouldFixAssignments()) {
2380           undeployRegionsForHbi(hbi);
2381         }
2382       }
2383       if (shouldFixMeta() && hbi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2384         if (!hbi.isHdfsRegioninfoPresent()) {
2385           LOG.error("This should have been repaired in table integrity repair phase");
2386           return;
2387         }
2388 
2389         LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2390         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2391         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2392             admin.getClusterStatus().getServers(), numReplicas);
2393         tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2394       }
2395 
2396     // ========== Cases where the region is in hbase:meta =============
2397     } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2398       // check whether this is an actual error, or just transient state where parent
2399       // is not cleaned
2400       if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2401         // check that split daughters are there
2402         HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2403         HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2404         if (infoA != null && infoB != null) {
2405           // we already processed or will process daughters. Move on, nothing to see here.
2406           hbi.setSkipChecks(true);
2407           return;
2408         }
2409       }
2410       errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2411           + descriptiveName + " is a split parent in META, in HDFS, "
2412           + "and not deployed on any region server. This could be transient.");
2413       if (shouldFixSplitParents()) {
2414         setShouldRerun();
2415         resetSplitParent(hbi);
2416       }
2417     } else if (inMeta && !inHdfs && !isDeployed) {
2418       errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2419           + descriptiveName + " found in META, but not in HDFS "
2420           + "or deployed on any region server.");
2421       if (shouldFixMeta()) {
2422         deleteMetaRegion(hbi);
2423       }
2424     } else if (inMeta && !inHdfs && isDeployed) {
2425       errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2426           + " found in META, but not in HDFS, " +
2427           "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2428       // We treat HDFS as ground truth.  Any information in meta is transient
2429       // and equivalent data can be regenerated.  So, lets unassign and remove
2430       // these problems from META.
2431       if (shouldFixAssignments()) {
2432         errors.print("Trying to fix unassigned region...");
2433         undeployRegions(hbi);
2434       }
2435       if (shouldFixMeta()) {
2436         // wait for it to complete
2437         deleteMetaRegion(hbi);
2438       }
2439     } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2440       errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2441           + " not deployed on any region server.");
2442       tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2443     } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2444       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2445           "Region " + descriptiveName + " should not be deployed according " +
2446           "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2447       if (shouldFixAssignments()) {
2448         errors.print("Trying to close the region " + descriptiveName);
2449         setShouldRerun();
2450         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2451       }
2452     } else if (inMeta && inHdfs && isMultiplyDeployed) {
2453       errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2454           + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2455           + " but is multiply assigned to region servers " +
2456           Joiner.on(", ").join(hbi.deployedOn));
2457       // If we are trying to fix the errors
2458       if (shouldFixAssignments()) {
2459         errors.print("Trying to fix assignment error...");
2460         setShouldRerun();
2461         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2462       }
2463     } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2464       errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2465           + descriptiveName + " listed in hbase:meta on region server " +
2466           hbi.metaEntry.regionServer + " but found on region server " +
2467           hbi.deployedOn.get(0));
2468       // If we are trying to fix the errors
2469       if (shouldFixAssignments()) {
2470         errors.print("Trying to fix assignment error...");
2471         setShouldRerun();
2472         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2473         HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2474       }
2475     } else {
2476       errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2477           " is in an unforeseen state:" +
2478           " inMeta=" + inMeta +
2479           " inHdfs=" + inHdfs +
2480           " isDeployed=" + isDeployed +
2481           " isMultiplyDeployed=" + isMultiplyDeployed +
2482           " deploymentMatchesMeta=" + deploymentMatchesMeta +
2483           " shouldBeDeployed=" + shouldBeDeployed);
2484     }
2485   }
2486 
2487   /**
2488    * Checks tables integrity. Goes over all regions and scans the tables.
2489    * Collects all the pieces for each table and checks if there are missing,
2490    * repeated or overlapping ones.
2491    * @throws IOException
2492    */
2493   SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2494     tablesInfo = new TreeMap<TableName,TableInfo> ();
2495     LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2496     for (HbckInfo hbi : regionInfoMap.values()) {
2497       // Check only valid, working regions
2498       if (hbi.metaEntry == null) {
2499         // this assumes that consistency check has run loadMetaEntry
2500         Path p = hbi.getHdfsRegionDir();
2501         if (p == null) {
2502           errors.report("No regioninfo in Meta or HDFS. " + hbi);
2503         }
2504 
2505         // TODO test.
2506         continue;
2507       }
2508       if (hbi.metaEntry.regionServer == null) {
2509         errors.detail("Skipping region because no region server: " + hbi);
2510         continue;
2511       }
2512       if (hbi.metaEntry.isOffline()) {
2513         errors.detail("Skipping region because it is offline: " + hbi);
2514         continue;
2515       }
2516       if (hbi.containsOnlyHdfsEdits()) {
2517         errors.detail("Skipping region because it only contains edits" + hbi);
2518         continue;
2519       }
2520 
2521       // Missing regionDir or over-deployment is checked elsewhere. Include
2522       // these cases in modTInfo, so we can evaluate those regions as part of
2523       // the region chain in META
2524       //if (hbi.foundRegionDir == null) continue;
2525       //if (hbi.deployedOn.size() != 1) continue;
2526       if (hbi.deployedOn.size() == 0) continue;
2527 
2528       // We should be safe here
2529       TableName tableName = hbi.metaEntry.getTable();
2530       TableInfo modTInfo = tablesInfo.get(tableName);
2531       if (modTInfo == null) {
2532         modTInfo = new TableInfo(tableName);
2533       }
2534       for (ServerName server : hbi.deployedOn) {
2535         modTInfo.addServer(server);
2536       }
2537 
2538       if (!hbi.isSkipChecks()) {
2539         modTInfo.addRegionInfo(hbi);
2540       }
2541 
2542       tablesInfo.put(tableName, modTInfo);
2543     }
2544 
2545     loadTableInfosForTablesWithNoRegion();
2546 
2547     logParallelMerge();
2548     for (TableInfo tInfo : tablesInfo.values()) {
2549       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2550       if (!tInfo.checkRegionChain(handler)) {
2551         errors.report("Found inconsistency in table " + tInfo.getName());
2552       }
2553     }
2554     return tablesInfo;
2555   }
2556 
2557   /** Loads table info's for tables that may not have been included, since there are no
2558    * regions reported for the table, but table dir is there in hdfs
2559    */
2560   private void loadTableInfosForTablesWithNoRegion() throws IOException {
2561     Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2562     for (HTableDescriptor htd : allTables.values()) {
2563       if (checkMetaOnly && !htd.isMetaTable()) {
2564         continue;
2565       }
2566 
2567       TableName tableName = htd.getTableName();
2568       if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2569         TableInfo tableInfo = new TableInfo(tableName);
2570         tableInfo.htds.add(htd);
2571         tablesInfo.put(htd.getTableName(), tableInfo);
2572       }
2573     }
2574   }
2575 
2576   /**
2577    * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2578    * @return number of file move fixes done to merge regions.
2579    */
2580   public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2581     int fileMoves = 0;
2582     String thread = Thread.currentThread().getName();
2583     LOG.debug("[" + thread + "] Contained region dir after close and pause");
2584     debugLsr(contained.getHdfsRegionDir());
2585 
2586     // rename the contained into the container.
2587     FileSystem fs = targetRegionDir.getFileSystem(getConf());
2588     FileStatus[] dirs = null;
2589     try {
2590       dirs = fs.listStatus(contained.getHdfsRegionDir());
2591     } catch (FileNotFoundException fnfe) {
2592       // region we are attempting to merge in is not present!  Since this is a merge, there is
2593       // no harm skipping this region if it does not exist.
2594       if (!fs.exists(contained.getHdfsRegionDir())) {
2595         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2596             + " is missing. Assuming already sidelined or moved.");
2597       } else {
2598         sidelineRegionDir(fs, contained);
2599       }
2600       return fileMoves;
2601     }
2602 
2603     if (dirs == null) {
2604       if (!fs.exists(contained.getHdfsRegionDir())) {
2605         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2606             + " already sidelined.");
2607       } else {
2608         sidelineRegionDir(fs, contained);
2609       }
2610       return fileMoves;
2611     }
2612 
2613     for (FileStatus cf : dirs) {
2614       Path src = cf.getPath();
2615       Path dst =  new Path(targetRegionDir, src.getName());
2616 
2617       if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2618         // do not copy the old .regioninfo file.
2619         continue;
2620       }
2621 
2622       if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2623         // do not copy the .oldlogs files
2624         continue;
2625       }
2626 
2627       LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2628       // FileSystem.rename is inconsistent with directories -- if the
2629       // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2630       // it moves the src into the dst dir resulting in (foo/a/b).  If
2631       // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2632       for (FileStatus hfile : fs.listStatus(src)) {
2633         boolean success = fs.rename(hfile.getPath(), dst);
2634         if (success) {
2635           fileMoves++;
2636         }
2637       }
2638       LOG.debug("[" + thread + "] Sideline directory contents:");
2639       debugLsr(targetRegionDir);
2640     }
2641 
2642     // if all success.
2643     sidelineRegionDir(fs, contained);
2644     LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2645         getSidelineDir());
2646     debugLsr(contained.getHdfsRegionDir());
2647 
2648     return fileMoves;
2649   }
2650 
2651 
2652   static class WorkItemOverlapMerge implements Callable<Void> {
2653     private TableIntegrityErrorHandler handler;
2654     Collection<HbckInfo> overlapgroup;
2655 
2656     WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2657       this.handler = handler;
2658       this.overlapgroup = overlapgroup;
2659     }
2660 
2661     @Override
2662     public Void call() throws Exception {
2663       handler.handleOverlapGroup(overlapgroup);
2664       return null;
2665     }
2666   };
2667 
2668 
2669   /**
2670    * Maintain information about a particular table.
2671    */
2672   public class TableInfo {
2673     TableName tableName;
2674     TreeSet <ServerName> deployedOn;
2675 
2676     // backwards regions
2677     final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2678 
2679     // sidelined big overlapped regions
2680     final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2681 
2682     // region split calculator
2683     final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2684 
2685     // Histogram of different HTableDescriptors found.  Ideally there is only one!
2686     final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2687 
2688     // key = start split, values = set of splits in problem group
2689     final Multimap<byte[], HbckInfo> overlapGroups =
2690       TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2691 
2692     // list of regions derived from meta entries.
2693     private ImmutableList<HRegionInfo> regionsFromMeta = null;
2694 
2695     TableInfo(TableName name) {
2696       this.tableName = name;
2697       deployedOn = new TreeSet <ServerName>();
2698     }
2699 
2700     /**
2701      * @return descriptor common to all regions.  null if are none or multiple!
2702      */
2703     private HTableDescriptor getHTD() {
2704       if (htds.size() == 1) {
2705         return (HTableDescriptor)htds.toArray()[0];
2706       } else {
2707         LOG.error("None/Multiple table descriptors found for table '"
2708           + tableName + "' regions: " + htds);
2709       }
2710       return null;
2711     }
2712 
2713     public void addRegionInfo(HbckInfo hir) {
2714       if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2715         // end key is absolute end key, just add it.
2716         // ignore replicas other than primary for these checks
2717         if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2718         return;
2719       }
2720 
2721       // if not the absolute end key, check for cycle
2722       if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2723         errors.reportError(
2724             ERROR_CODE.REGION_CYCLE,
2725             String.format("The endkey for this region comes before the "
2726                 + "startkey, startkey=%s, endkey=%s",
2727                 Bytes.toStringBinary(hir.getStartKey()),
2728                 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2729         backwards.add(hir);
2730         return;
2731       }
2732 
2733       // main case, add to split calculator
2734       // ignore replicas other than primary for these checks
2735       if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2736     }
2737 
2738     public void addServer(ServerName server) {
2739       this.deployedOn.add(server);
2740     }
2741 
2742     public TableName getName() {
2743       return tableName;
2744     }
2745 
2746     public int getNumRegions() {
2747       return sc.getStarts().size() + backwards.size();
2748     }
2749 
2750     public synchronized ImmutableList<HRegionInfo> getRegionsFromMeta() {
2751       // lazy loaded, synchronized to ensure a single load
2752       if (regionsFromMeta == null) {
2753         List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
2754         for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) {
2755           if (tableName.equals(h.getTableName())) {
2756             if (h.metaEntry != null) {
2757               regions.add((HRegionInfo) h.metaEntry);
2758             }
2759           }
2760         }
2761         regionsFromMeta = Ordering.natural().immutableSortedCopy(regions);
2762       }
2763       
2764       return regionsFromMeta;
2765     }
2766     
2767 
2768       private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2769       ErrorReporter errors;
2770 
2771       IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2772         this.errors = errors;
2773         setTableInfo(ti);
2774       }
2775 
2776       @Override
2777       public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2778         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2779             "First region should start with an empty key.  You need to "
2780             + " create a new region and regioninfo in HDFS to plug the hole.",
2781             getTableInfo(), hi);
2782       }
2783 
2784       @Override
2785       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2786         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2787             "Last region should end with an empty key. You need to "
2788                 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2789       }
2790 
2791       @Override
2792       public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2793         errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2794             "Region has the same start and end key.", getTableInfo(), hi);
2795       }
2796 
2797       @Override
2798       public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2799         byte[] key = r1.getStartKey();
2800         // dup start key
2801         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2802             "Multiple regions have the same startkey: "
2803             + Bytes.toStringBinary(key), getTableInfo(), r1);
2804         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2805             "Multiple regions have the same startkey: "
2806             + Bytes.toStringBinary(key), getTableInfo(), r2);
2807       }
2808 
2809       @Override
2810       public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2811         errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2812             "There is an overlap in the region chain.",
2813             getTableInfo(), hi1, hi2);
2814       }
2815 
2816       @Override
2817       public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2818         errors.reportError(
2819             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2820             "There is a hole in the region chain between "
2821                 + Bytes.toStringBinary(holeStart) + " and "
2822                 + Bytes.toStringBinary(holeStop)
2823                 + ".  You need to create a new .regioninfo and region "
2824                 + "dir in hdfs to plug the hole.");
2825       }
2826     };
2827 
2828     /**
2829      * This handler fixes integrity errors from hdfs information.  There are
2830      * basically three classes of integrity problems 1) holes, 2) overlaps, and
2831      * 3) invalid regions.
2832      *
2833      * This class overrides methods that fix holes and the overlap group case.
2834      * Individual cases of particular overlaps are handled by the general
2835      * overlap group merge repair case.
2836      *
2837      * If hbase is online, this forces regions offline before doing merge
2838      * operations.
2839      */
2840     private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2841       Configuration conf;
2842 
2843       boolean fixOverlaps = true;
2844 
2845       HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2846           boolean fixHoles, boolean fixOverlaps) {
2847         super(ti, errors);
2848         this.conf = conf;
2849         this.fixOverlaps = fixOverlaps;
2850         // TODO properly use fixHoles
2851       }
2852 
2853       /**
2854        * This is a special case hole -- when the first region of a table is
2855        * missing from META, HBase doesn't acknowledge the existance of the
2856        * table.
2857        */
2858       @Override
2859       public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2860         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2861             "First region should start with an empty key.  Creating a new " +
2862             "region and regioninfo in HDFS to plug the hole.",
2863             getTableInfo(), next);
2864         HTableDescriptor htd = getTableInfo().getHTD();
2865         // from special EMPTY_START_ROW to next region's startKey
2866         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2867             HConstants.EMPTY_START_ROW, next.getStartKey());
2868 
2869         // TODO test
2870         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2871         LOG.info("Table region start key was not empty.  Created new empty region: "
2872             + newRegion + " " +region);
2873         fixes++;
2874       }
2875 
2876       @Override
2877       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2878         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2879             "Last region should end with an empty key.  Creating a new "
2880                 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2881         HTableDescriptor htd = getTableInfo().getHTD();
2882         // from curEndKey to EMPTY_START_ROW
2883         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2884             HConstants.EMPTY_START_ROW);
2885 
2886         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2887         LOG.info("Table region end key was not empty.  Created new empty region: " + newRegion
2888             + " " + region);
2889         fixes++;
2890       }
2891 
2892       /**
2893        * There is a hole in the hdfs regions that violates the table integrity
2894        * rules.  Create a new empty region that patches the hole.
2895        */
2896       @Override
2897       public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2898         errors.reportError(
2899             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2900             "There is a hole in the region chain between "
2901                 + Bytes.toStringBinary(holeStartKey) + " and "
2902                 + Bytes.toStringBinary(holeStopKey)
2903                 + ".  Creating a new regioninfo and region "
2904                 + "dir in hdfs to plug the hole.");
2905         HTableDescriptor htd = getTableInfo().getHTD();
2906         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2907         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2908         LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
2909         fixes++;
2910       }
2911 
2912       /**
2913        * This takes set of overlapping regions and merges them into a single
2914        * region.  This covers cases like degenerate regions, shared start key,
2915        * general overlaps, duplicate ranges, and partial overlapping regions.
2916        *
2917        * Cases:
2918        * - Clean regions that overlap
2919        * - Only .oldlogs regions (can't find start/stop range, or figure out)
2920        *
2921        * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
2922        */
2923       @Override
2924       public void handleOverlapGroup(Collection<HbckInfo> overlap)
2925           throws IOException {
2926         Preconditions.checkNotNull(overlap);
2927         Preconditions.checkArgument(overlap.size() >0);
2928 
2929         if (!this.fixOverlaps) {
2930           LOG.warn("Not attempting to repair overlaps.");
2931           return;
2932         }
2933 
2934         if (overlap.size() > maxMerge) {
2935           LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2936             "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2937           if (sidelineBigOverlaps) {
2938             // we only sideline big overlapped groups that exceeds the max number of regions to merge
2939             sidelineBigOverlaps(overlap);
2940           }
2941           return;
2942         }
2943 
2944         mergeOverlaps(overlap);
2945       }
2946 
2947       void mergeOverlaps(Collection<HbckInfo> overlap)
2948           throws IOException {
2949         String thread = Thread.currentThread().getName();
2950         LOG.info("== [" + thread + "] Merging regions into one region: "
2951           + Joiner.on(",").join(overlap));
2952         // get the min / max range and close all concerned regions
2953         Pair<byte[], byte[]> range = null;
2954         for (HbckInfo hi : overlap) {
2955           if (range == null) {
2956             range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
2957           } else {
2958             if (RegionSplitCalculator.BYTES_COMPARATOR
2959                 .compare(hi.getStartKey(), range.getFirst()) < 0) {
2960               range.setFirst(hi.getStartKey());
2961             }
2962             if (RegionSplitCalculator.BYTES_COMPARATOR
2963                 .compare(hi.getEndKey(), range.getSecond()) > 0) {
2964               range.setSecond(hi.getEndKey());
2965             }
2966           }
2967           // need to close files so delete can happen.
2968           LOG.debug("[" + thread + "] Closing region before moving data around: " +  hi);
2969           LOG.debug("[" + thread + "] Contained region dir before close");
2970           debugLsr(hi.getHdfsRegionDir());
2971           try {
2972             LOG.info("[" + thread + "] Closing region: " + hi);
2973             closeRegion(hi);
2974           } catch (IOException ioe) {
2975             LOG.warn("[" + thread + "] Was unable to close region " + hi
2976               + ".  Just continuing... ", ioe);
2977           } catch (InterruptedException e) {
2978             LOG.warn("[" + thread + "] Was unable to close region " + hi
2979               + ".  Just continuing... ", e);
2980           }
2981 
2982           try {
2983             LOG.info("[" + thread + "] Offlining region: " + hi);
2984             offline(hi.getRegionName());
2985           } catch (IOException ioe) {
2986             LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
2987               + ".  Just continuing... ", ioe);
2988           }
2989         }
2990 
2991         // create new empty container region.
2992         HTableDescriptor htd = getTableInfo().getHTD();
2993         // from start key to end Key
2994         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
2995             range.getSecond());
2996         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2997         LOG.info("[" + thread + "] Created new empty container region: " +
2998             newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
2999         debugLsr(region.getRegionFileSystem().getRegionDir());
3000 
3001         // all target regions are closed, should be able to safely cleanup.
3002         boolean didFix= false;
3003         Path target = region.getRegionFileSystem().getRegionDir();
3004         for (HbckInfo contained : overlap) {
3005           LOG.info("[" + thread + "] Merging " + contained  + " into " + target );
3006           int merges = mergeRegionDirs(target, contained);
3007           if (merges > 0) {
3008             didFix = true;
3009           }
3010         }
3011         if (didFix) {
3012           fixes++;
3013         }
3014       }
3015 
3016       /**
3017        * Sideline some regions in a big overlap group so that it
3018        * will have fewer regions, and it is easier to merge them later on.
3019        *
3020        * @param bigOverlap the overlapped group with regions more than maxMerge
3021        * @throws IOException
3022        */
3023       void sidelineBigOverlaps(
3024           Collection<HbckInfo> bigOverlap) throws IOException {
3025         int overlapsToSideline = bigOverlap.size() - maxMerge;
3026         if (overlapsToSideline > maxOverlapsToSideline) {
3027           overlapsToSideline = maxOverlapsToSideline;
3028         }
3029         List<HbckInfo> regionsToSideline =
3030           RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
3031         FileSystem fs = FileSystem.get(conf);
3032         for (HbckInfo regionToSideline: regionsToSideline) {
3033           try {
3034             LOG.info("Closing region: " + regionToSideline);
3035             closeRegion(regionToSideline);
3036           } catch (IOException ioe) {
3037             LOG.warn("Was unable to close region " + regionToSideline
3038               + ".  Just continuing... ", ioe);
3039           } catch (InterruptedException e) {
3040             LOG.warn("Was unable to close region " + regionToSideline
3041               + ".  Just continuing... ", e);
3042           }
3043 
3044           try {
3045             LOG.info("Offlining region: " + regionToSideline);
3046             offline(regionToSideline.getRegionName());
3047           } catch (IOException ioe) {
3048             LOG.warn("Unable to offline region from master: " + regionToSideline
3049               + ".  Just continuing... ", ioe);
3050           }
3051 
3052           LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
3053           Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
3054           if (sidelineRegionDir != null) {
3055             sidelinedRegions.put(sidelineRegionDir, regionToSideline);
3056             LOG.info("After sidelined big overlapped region: "
3057               + regionToSideline.getRegionNameAsString()
3058               + " to " + sidelineRegionDir.toString());
3059             fixes++;
3060           }
3061         }
3062       }
3063     }
3064 
3065     /**
3066      * Check the region chain (from META) of this table.  We are looking for
3067      * holes, overlaps, and cycles.
3068      * @return false if there are errors
3069      * @throws IOException
3070      */
3071     public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
3072       // When table is disabled no need to check for the region chain. Some of the regions
3073       // accidently if deployed, this below code might report some issues like missing start
3074       // or end regions or region hole in chain and may try to fix which is unwanted.
3075       if (disabledTables.contains(this.tableName)) {
3076         return true;
3077       }
3078       int originalErrorsCount = errors.getErrorList().size();
3079       Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
3080       SortedSet<byte[]> splits = sc.getSplits();
3081 
3082       byte[] prevKey = null;
3083       byte[] problemKey = null;
3084 
3085       if (splits.size() == 0) {
3086         // no region for this table
3087         handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
3088       }
3089 
3090       for (byte[] key : splits) {
3091         Collection<HbckInfo> ranges = regions.get(key);
3092         if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
3093           for (HbckInfo rng : ranges) {
3094             handler.handleRegionStartKeyNotEmpty(rng);
3095           }
3096         }
3097 
3098         // check for degenerate ranges
3099         for (HbckInfo rng : ranges) {
3100           // special endkey case converts '' to null
3101           byte[] endKey = rng.getEndKey();
3102           endKey = (endKey.length == 0) ? null : endKey;
3103           if (Bytes.equals(rng.getStartKey(),endKey)) {
3104             handler.handleDegenerateRegion(rng);
3105           }
3106         }
3107 
3108         if (ranges.size() == 1) {
3109           // this split key is ok -- no overlap, not a hole.
3110           if (problemKey != null) {
3111             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3112           }
3113           problemKey = null; // fell through, no more problem.
3114         } else if (ranges.size() > 1) {
3115           // set the new problem key group name, if already have problem key, just
3116           // keep using it.
3117           if (problemKey == null) {
3118             // only for overlap regions.
3119             LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
3120             problemKey = key;
3121           }
3122           overlapGroups.putAll(problemKey, ranges);
3123 
3124           // record errors
3125           ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
3126           //  this dumb and n^2 but this shouldn't happen often
3127           for (HbckInfo r1 : ranges) {
3128             if (r1.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
3129             subRange.remove(r1);
3130             for (HbckInfo r2 : subRange) {
3131               if (r2.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
3132               if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
3133                 handler.handleDuplicateStartKeys(r1,r2);
3134               } else {
3135                 // overlap
3136                 handler.handleOverlapInRegionChain(r1, r2);
3137               }
3138             }
3139           }
3140 
3141         } else if (ranges.size() == 0) {
3142           if (problemKey != null) {
3143             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3144           }
3145           problemKey = null;
3146 
3147           byte[] holeStopKey = sc.getSplits().higher(key);
3148           // if higher key is null we reached the top.
3149           if (holeStopKey != null) {
3150             // hole
3151             handler.handleHoleInRegionChain(key, holeStopKey);
3152           }
3153         }
3154         prevKey = key;
3155       }
3156 
3157       // When the last region of a table is proper and having an empty end key, 'prevKey'
3158       // will be null.
3159       if (prevKey != null) {
3160         handler.handleRegionEndKeyNotEmpty(prevKey);
3161       }
3162 
3163       // TODO fold this into the TableIntegrityHandler
3164       if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
3165         boolean ok = handleOverlapsParallel(handler, prevKey);
3166         if (!ok) {
3167           return false;
3168         }
3169       } else {
3170         for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3171           handler.handleOverlapGroup(overlap);
3172         }
3173       }
3174 
3175       if (details) {
3176         // do full region split map dump
3177         errors.print("---- Table '"  +  this.tableName
3178             + "': region split map");
3179         dump(splits, regions);
3180         errors.print("---- Table '"  +  this.tableName
3181             + "': overlap groups");
3182         dumpOverlapProblems(overlapGroups);
3183         errors.print("There are " + overlapGroups.keySet().size()
3184             + " overlap groups with " + overlapGroups.size()
3185             + " overlapping regions");
3186       }
3187       if (!sidelinedRegions.isEmpty()) {
3188         LOG.warn("Sidelined big overlapped regions, please bulk load them!");
3189         errors.print("---- Table '"  +  this.tableName
3190             + "': sidelined big overlapped regions");
3191         dumpSidelinedRegions(sidelinedRegions);
3192       }
3193       return errors.getErrorList().size() == originalErrorsCount;
3194     }
3195 
3196     private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
3197         throws IOException {
3198       // we parallelize overlap handler for the case we have lots of groups to fix.  We can
3199       // safely assume each group is independent.
3200       List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
3201       List<Future<Void>> rets;
3202       for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3203         //
3204         merges.add(new WorkItemOverlapMerge(overlap, handler));
3205       }
3206       try {
3207         rets = executor.invokeAll(merges);
3208       } catch (InterruptedException e) {
3209         LOG.error("Overlap merges were interrupted", e);
3210         return false;
3211       }
3212       for(int i=0; i<merges.size(); i++) {
3213         WorkItemOverlapMerge work = merges.get(i);
3214         Future<Void> f = rets.get(i);
3215         try {
3216           f.get();
3217         } catch(ExecutionException e) {
3218           LOG.warn("Failed to merge overlap group" + work, e.getCause());
3219         } catch (InterruptedException e) {
3220           LOG.error("Waiting for overlap merges was interrupted", e);
3221           return false;
3222         }
3223       }
3224       return true;
3225     }
3226 
3227     /**
3228      * This dumps data in a visually reasonable way for visual debugging
3229      *
3230      * @param splits
3231      * @param regions
3232      */
3233     void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
3234       // we display this way because the last end key should be displayed as well.
3235       StringBuilder sb = new StringBuilder();
3236       for (byte[] k : splits) {
3237         sb.setLength(0); // clear out existing buffer, if any.
3238         sb.append(Bytes.toStringBinary(k) + ":\t");
3239         for (HbckInfo r : regions.get(k)) {
3240           sb.append("[ "+ r.toString() + ", "
3241               + Bytes.toStringBinary(r.getEndKey())+ "]\t");
3242         }
3243         errors.print(sb.toString());
3244       }
3245     }
3246   }
3247 
3248   public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
3249     // we display this way because the last end key should be displayed as
3250     // well.
3251     for (byte[] k : regions.keySet()) {
3252       errors.print(Bytes.toStringBinary(k) + ":");
3253       for (HbckInfo r : regions.get(k)) {
3254         errors.print("[ " + r.toString() + ", "
3255             + Bytes.toStringBinary(r.getEndKey()) + "]");
3256       }
3257       errors.print("----");
3258     }
3259   }
3260 
3261   public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
3262     for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
3263       TableName tableName = entry.getValue().getTableName();
3264       Path path = entry.getKey();
3265       errors.print("This sidelined region dir should be bulk loaded: "
3266         + path.toString());
3267       errors.print("Bulk load command looks like: "
3268         + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
3269         + path.toUri().getPath() + " "+ tableName);
3270     }
3271   }
3272 
3273   public Multimap<byte[], HbckInfo> getOverlapGroups(
3274       TableName table) {
3275     TableInfo ti = tablesInfo.get(table);
3276     return ti.overlapGroups;
3277   }
3278 
3279   /**
3280    * Return a list of user-space table names whose metadata have not been
3281    * modified in the last few milliseconds specified by timelag
3282    * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
3283    * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
3284    * milliseconds specified by timelag, then the table is a candidate to be returned.
3285    * @return tables that have not been modified recently
3286    * @throws IOException if an error is encountered
3287    */
3288   HTableDescriptor[] getTables(AtomicInteger numSkipped) {
3289     List<TableName> tableNames = new ArrayList<TableName>();
3290     long now = EnvironmentEdgeManager.currentTime();
3291 
3292     for (HbckInfo hbi : regionInfoMap.values()) {
3293       MetaEntry info = hbi.metaEntry;
3294 
3295       // if the start key is zero, then we have found the first region of a table.
3296       // pick only those tables that were not modified in the last few milliseconds.
3297       if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
3298         if (info.modTime + timelag < now) {
3299           tableNames.add(info.getTable());
3300         } else {
3301           numSkipped.incrementAndGet(); // one more in-flux table
3302         }
3303       }
3304     }
3305     return getHTableDescriptors(tableNames);
3306   }
3307 
3308   HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
3309     HTableDescriptor[] htd = new HTableDescriptor[0];
3310     Admin admin = null;
3311     try {
3312       LOG.info("getHTableDescriptors == tableNames => " + tableNames);
3313       admin = new HBaseAdmin(getConf());
3314       htd = admin.getTableDescriptorsByTableName(tableNames);
3315     } catch (IOException e) {
3316       LOG.debug("Exception getting table descriptors", e);
3317     } finally {
3318       if (admin != null) {
3319         try {
3320           admin.close();
3321         } catch (IOException e) {
3322           LOG.debug("Exception closing HBaseAdmin", e);
3323         }
3324       }
3325     }
3326     return htd;
3327   }
3328 
3329   /**
3330    * Gets the entry in regionInfo corresponding to the the given encoded
3331    * region name. If the region has not been seen yet, a new entry is added
3332    * and returned.
3333    */
3334   private synchronized HbckInfo getOrCreateInfo(String name) {
3335     HbckInfo hbi = regionInfoMap.get(name);
3336     if (hbi == null) {
3337       hbi = new HbckInfo(null);
3338       regionInfoMap.put(name, hbi);
3339     }
3340     return hbi;
3341   }
3342 
3343   private void checkAndFixTableLocks() throws IOException {
3344     TableLockChecker checker = new TableLockChecker(zkw, errors);
3345     checker.checkTableLocks();
3346 
3347     if (this.fixTableLocks) {
3348       checker.fixExpiredTableLocks();
3349     }
3350   }
3351 
3352   /**
3353    * Check whether a orphaned table ZNode exists and fix it if requested.
3354    * @throws IOException
3355    * @throws KeeperException
3356    * @throws InterruptedException
3357    */
3358   private void checkAndFixOrphanedTableZNodes()
3359       throws IOException, KeeperException, InterruptedException {
3360     Set<TableName> enablingTables = ZKTableStateClientSideReader.getEnablingTables(zkw);
3361     String msg;
3362     TableInfo tableInfo;
3363 
3364     for (TableName tableName : enablingTables) {
3365       // Check whether the table exists in hbase
3366       tableInfo = tablesInfo.get(tableName);
3367       if (tableInfo != null) {
3368         // Table exists.  This table state is in transit.  No problem for this table.
3369         continue;
3370       }
3371 
3372       msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found.";
3373       LOG.warn(msg);
3374       orphanedTableZNodes.add(tableName);
3375       errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg);
3376     }
3377 
3378     if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) {
3379       ZKTableStateManager zkTableStateMgr = new ZKTableStateManager(zkw);
3380 
3381       for (TableName tableName : orphanedTableZNodes) {
3382         try {
3383           // Set the table state to be disabled so that if we made mistake, we can trace
3384           // the history and figure it out.
3385           // Another choice is to call checkAndRemoveTableState() to delete the orphaned ZNode.
3386           // Both approaches works.
3387           zkTableStateMgr.setTableState(tableName, ZooKeeperProtos.Table.State.DISABLED);
3388         } catch (CoordinatedStateException e) {
3389           // This exception should not happen here
3390           LOG.error(
3391             "Got a CoordinatedStateException while fixing the ENABLING table znode " + tableName,
3392             e);
3393         }
3394       }
3395     }
3396   }
3397 
3398   /**
3399     * Check values in regionInfo for hbase:meta
3400     * Check if zero or more than one regions with hbase:meta are found.
3401     * If there are inconsistencies (i.e. zero or more than one regions
3402     * pretend to be holding the hbase:meta) try to fix that and report an error.
3403     * @throws IOException from HBaseFsckRepair functions
3404     * @throws KeeperException
3405     * @throws InterruptedException
3406     */
3407   boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3408     Map<Integer, HbckInfo> metaRegions = new HashMap<Integer, HbckInfo>();
3409     for (HbckInfo value : regionInfoMap.values()) {
3410       if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3411         metaRegions.put(value.getReplicaId(), value);
3412       }
3413     }
3414     int metaReplication = admin.getTableDescriptor(TableName.META_TABLE_NAME)
3415         .getRegionReplication();
3416     boolean noProblem = true;
3417     // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
3418     // Check the deployed servers. It should be exactly one server for each replica.
3419     for (int i = 0; i < metaReplication; i++) {
3420       HbckInfo metaHbckInfo = metaRegions.remove(i);
3421       List<ServerName> servers = new ArrayList<ServerName>();
3422       if (metaHbckInfo != null) {
3423         servers = metaHbckInfo.deployedOn;
3424       }
3425       if (servers.size() != 1) {
3426         noProblem = false;
3427         if (servers.size() == 0) {
3428           assignMetaReplica(i);
3429         } else if (servers.size() > 1) {
3430           errors
3431           .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
3432                        metaHbckInfo.getReplicaId() + " is found on more than one region.");
3433           if (shouldFixAssignments()) {
3434             errors.print("Trying to fix a problem with hbase:meta, replicaId " +
3435                          metaHbckInfo.getReplicaId() +"..");
3436             setShouldRerun();
3437             // try fix it (treat is a dupe assignment)
3438             HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers);
3439           }
3440         }
3441       }
3442     }
3443     // unassign whatever is remaining in metaRegions. They are excess replicas.
3444     for (Map.Entry<Integer, HbckInfo> entry : metaRegions.entrySet()) {
3445       noProblem = false;
3446       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
3447           "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
3448           ", deployed " + metaRegions.size());
3449       if (shouldFixAssignments()) {
3450         errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
3451             " of hbase:meta..");
3452         setShouldRerun();
3453         unassignMetaReplica(entry.getValue());
3454       }
3455     }
3456     // if noProblem is false, rerun hbck with hopefully fixed META
3457     // if noProblem is true, no errors, so continue normally
3458     return noProblem;
3459   }
3460 
3461   private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException,
3462   KeeperException {
3463     undeployRegions(hi);
3464     ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(hi.metaEntry.getReplicaId()));
3465   }
3466 
3467   private void assignMetaReplica(int replicaId)
3468       throws IOException, KeeperException, InterruptedException {
3469     errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
3470         replicaId +" is not found on any region.");
3471     if (shouldFixAssignments()) {
3472       errors.print("Trying to fix a problem with hbase:meta..");
3473       setShouldRerun();
3474       // try to fix it (treat it as unassigned region)
3475       HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
3476           HRegionInfo.FIRST_META_REGIONINFO, replicaId);
3477       HBaseFsckRepair.fixUnassigned(admin, h);
3478       HBaseFsckRepair.waitUntilAssigned(admin, h);
3479     }
3480   }
3481 
3482   /**
3483    * Scan hbase:meta, adding all regions found to the regionInfo map.
3484    * @throws IOException if an error is encountered
3485    */
3486   boolean loadMetaEntries() throws IOException {
3487     MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
3488       int countRecord = 1;
3489 
3490       // comparator to sort KeyValues with latest modtime
3491       final Comparator<Cell> comp = new Comparator<Cell>() {
3492         @Override
3493         public int compare(Cell k1, Cell k2) {
3494           return (int)(k1.getTimestamp() - k2.getTimestamp());
3495         }
3496       };
3497 
3498       @Override
3499       public boolean processRow(Result result) throws IOException {
3500         try {
3501 
3502           // record the latest modification of this META record
3503           long ts =  Collections.max(result.listCells(), comp).getTimestamp();
3504           RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
3505           if (rl == null) {
3506             emptyRegionInfoQualifiers.add(result);
3507             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3508               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3509             return true;
3510           }
3511           ServerName sn = null;
3512           if (rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID) == null ||
3513               rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) {
3514             emptyRegionInfoQualifiers.add(result);
3515             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3516               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3517             return true;
3518           }
3519           HRegionInfo hri = rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo();
3520           if (!(isTableIncluded(hri.getTable())
3521               || hri.isMetaRegion())) {
3522             return true;
3523           }
3524           PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
3525           for (HRegionLocation h : rl.getRegionLocations()) {
3526             if (h == null || h.getRegionInfo() == null) {
3527               continue;
3528             }
3529             sn = h.getServerName();
3530             hri = h.getRegionInfo();
3531 
3532             MetaEntry m = null;
3533             if (hri.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
3534               m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3535             } else {
3536               m = new MetaEntry(hri, sn, ts, null, null);
3537             }
3538             HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3539             if (previous == null) {
3540               regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3541             } else if (previous.metaEntry == null) {
3542               previous.metaEntry = m;
3543             } else {
3544               throw new IOException("Two entries in hbase:meta are same " + previous);
3545             }
3546           }
3547           PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result);
3548           for (HRegionInfo mergeRegion : new HRegionInfo[] {
3549               mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3550             if (mergeRegion != null) {
3551               // This region is already been merged
3552               HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3553               hbInfo.setMerged(true);
3554             }
3555           }
3556 
3557           // show proof of progress to the user, once for every 100 records.
3558           if (countRecord % 100 == 0) {
3559             errors.progress();
3560           }
3561           countRecord++;
3562           return true;
3563         } catch (RuntimeException e) {
3564           LOG.error("Result=" + result);
3565           throw e;
3566         }
3567       }
3568     };
3569     if (!checkMetaOnly) {
3570       // Scan hbase:meta to pick up user regions
3571       MetaScanner.metaScan(connection, visitor);
3572     }
3573 
3574     errors.print("");
3575     return true;
3576   }
3577 
3578   /**
3579    * Stores the regioninfo entries scanned from META
3580    */
3581   static class MetaEntry extends HRegionInfo {
3582     ServerName regionServer;   // server hosting this region
3583     long modTime;          // timestamp of most recent modification metadata
3584     HRegionInfo splitA, splitB; //split daughters
3585 
3586     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
3587       this(rinfo, regionServer, modTime, null, null);
3588     }
3589 
3590     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
3591         HRegionInfo splitA, HRegionInfo splitB) {
3592       super(rinfo);
3593       this.regionServer = regionServer;
3594       this.modTime = modTime;
3595       this.splitA = splitA;
3596       this.splitB = splitB;
3597     }
3598 
3599     @Override
3600     public boolean equals(Object o) {
3601       boolean superEq = super.equals(o);
3602       if (!superEq) {
3603         return superEq;
3604       }
3605 
3606       MetaEntry me = (MetaEntry) o;
3607       if (!regionServer.equals(me.regionServer)) {
3608         return false;
3609       }
3610       return (modTime == me.modTime);
3611     }
3612 
3613     @Override
3614     public int hashCode() {
3615       int hash = Arrays.hashCode(getRegionName());
3616       hash ^= getRegionId();
3617       hash ^= Arrays.hashCode(getStartKey());
3618       hash ^= Arrays.hashCode(getEndKey());
3619       hash ^= Boolean.valueOf(isOffline()).hashCode();
3620       hash ^= getTable().hashCode();
3621       if (regionServer != null) {
3622         hash ^= regionServer.hashCode();
3623       }
3624       hash ^= modTime;
3625       return hash;
3626     }
3627   }
3628 
3629   /**
3630    * Stores the regioninfo entries from HDFS
3631    */
3632   static class HdfsEntry {
3633     HRegionInfo hri;
3634     Path hdfsRegionDir = null;
3635     long hdfsRegionDirModTime  = 0;
3636     boolean hdfsRegioninfoFilePresent = false;
3637     boolean hdfsOnlyEdits = false;
3638   }
3639 
3640   /**
3641    * Stores the regioninfo retrieved from Online region servers.
3642    */
3643   static class OnlineEntry {
3644     HRegionInfo hri;
3645     ServerName hsa;
3646 
3647     @Override
3648     public String toString() {
3649       return hsa.toString() + ";" + hri.getRegionNameAsString();
3650     }
3651   }
3652 
3653   /**
3654    * Maintain information about a particular region.  It gathers information
3655    * from three places -- HDFS, META, and region servers.
3656    */
3657   public static class HbckInfo implements KeyRange {
3658     private MetaEntry metaEntry = null; // info in META
3659     private HdfsEntry hdfsEntry = null; // info in HDFS
3660     private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
3661     private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
3662     private boolean skipChecks = false; // whether to skip further checks to this region info.
3663     private boolean isMerged = false;// whether this region has already been merged into another one
3664     private int deployedReplicaId = HRegionInfo.DEFAULT_REPLICA_ID;
3665     private HRegionInfo primaryHRIForDeployedReplica = null;
3666 
3667     HbckInfo(MetaEntry metaEntry) {
3668       this.metaEntry = metaEntry;
3669     }
3670 
3671     public int getReplicaId() {
3672       if (metaEntry != null) return metaEntry.getReplicaId();
3673       return deployedReplicaId;
3674     }
3675 
3676     public synchronized void addServer(HRegionInfo hri, ServerName server) {
3677       OnlineEntry rse = new OnlineEntry() ;
3678       rse.hri = hri;
3679       rse.hsa = server;
3680       this.deployedEntries.add(rse);
3681       this.deployedOn.add(server);
3682       // save the replicaId that we see deployed in the cluster
3683       this.deployedReplicaId = hri.getReplicaId();
3684       this.primaryHRIForDeployedReplica =
3685           RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
3686     }
3687 
3688     @Override
3689     public synchronized String toString() {
3690       StringBuilder sb = new StringBuilder();
3691       sb.append("{ meta => ");
3692       sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3693       sb.append( ", hdfs => " + getHdfsRegionDir());
3694       sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3695       sb.append( ", replicaId => " + getReplicaId());
3696       sb.append(" }");
3697       return sb.toString();
3698     }
3699 
3700     @Override
3701     public byte[] getStartKey() {
3702       if (this.metaEntry != null) {
3703         return this.metaEntry.getStartKey();
3704       } else if (this.hdfsEntry != null) {
3705         return this.hdfsEntry.hri.getStartKey();
3706       } else {
3707         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3708         return null;
3709       }
3710     }
3711 
3712     @Override
3713     public byte[] getEndKey() {
3714       if (this.metaEntry != null) {
3715         return this.metaEntry.getEndKey();
3716       } else if (this.hdfsEntry != null) {
3717         return this.hdfsEntry.hri.getEndKey();
3718       } else {
3719         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3720         return null;
3721       }
3722     }
3723 
3724     public TableName getTableName() {
3725       if (this.metaEntry != null) {
3726         return this.metaEntry.getTable();
3727       } else if (this.hdfsEntry != null) {
3728         // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3729         // so we get the name from the Path
3730         Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3731         return FSUtils.getTableName(tableDir);
3732       } else {
3733         // return the info from the first online/deployed hri
3734         for (OnlineEntry e : deployedEntries) {
3735           return e.hri.getTable();
3736         }
3737         return null;
3738       }
3739     }
3740 
3741     public String getRegionNameAsString() {
3742       if (metaEntry != null) {
3743         return metaEntry.getRegionNameAsString();
3744       } else if (hdfsEntry != null) {
3745         if (hdfsEntry.hri != null) {
3746           return hdfsEntry.hri.getRegionNameAsString();
3747         }
3748       } else {
3749         // return the info from the first online/deployed hri
3750         for (OnlineEntry e : deployedEntries) {
3751           return e.hri.getRegionNameAsString();
3752         }
3753       }
3754       return null;
3755     }
3756 
3757     public byte[] getRegionName() {
3758       if (metaEntry != null) {
3759         return metaEntry.getRegionName();
3760       } else if (hdfsEntry != null) {
3761         return hdfsEntry.hri.getRegionName();
3762       } else {
3763         // return the info from the first online/deployed hri
3764         for (OnlineEntry e : deployedEntries) {
3765           return e.hri.getRegionName();
3766         }
3767         return null;
3768       }
3769     }
3770 
3771     public HRegionInfo getPrimaryHRIForDeployedReplica() {
3772       return primaryHRIForDeployedReplica;
3773     }
3774 
3775     Path getHdfsRegionDir() {
3776       if (hdfsEntry == null) {
3777         return null;
3778       }
3779       return hdfsEntry.hdfsRegionDir;
3780     }
3781 
3782     boolean containsOnlyHdfsEdits() {
3783       if (hdfsEntry == null) {
3784         return false;
3785       }
3786       return hdfsEntry.hdfsOnlyEdits;
3787     }
3788 
3789     boolean isHdfsRegioninfoPresent() {
3790       if (hdfsEntry == null) {
3791         return false;
3792       }
3793       return hdfsEntry.hdfsRegioninfoFilePresent;
3794     }
3795 
3796     long getModTime() {
3797       if (hdfsEntry == null) {
3798         return 0;
3799       }
3800       return hdfsEntry.hdfsRegionDirModTime;
3801     }
3802 
3803     HRegionInfo getHdfsHRI() {
3804       if (hdfsEntry == null) {
3805         return null;
3806       }
3807       return hdfsEntry.hri;
3808     }
3809 
3810     public void setSkipChecks(boolean skipChecks) {
3811       this.skipChecks = skipChecks;
3812     }
3813 
3814     public boolean isSkipChecks() {
3815       return skipChecks;
3816     }
3817 
3818     public void setMerged(boolean isMerged) {
3819       this.isMerged = isMerged;
3820     }
3821 
3822     public boolean isMerged() {
3823       return this.isMerged;
3824     }
3825   }
3826 
3827   final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
3828     @Override
3829     public int compare(HbckInfo l, HbckInfo r) {
3830       if (l == r) {
3831         // same instance
3832         return 0;
3833       }
3834 
3835       int tableCompare = l.getTableName().compareTo(r.getTableName());
3836       if (tableCompare != 0) {
3837         return tableCompare;
3838       }
3839 
3840       int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3841           l.getStartKey(), r.getStartKey());
3842       if (startComparison != 0) {
3843         return startComparison;
3844       }
3845 
3846       // Special case for absolute endkey
3847       byte[] endKey = r.getEndKey();
3848       endKey = (endKey.length == 0) ? null : endKey;
3849       byte[] endKey2 = l.getEndKey();
3850       endKey2 = (endKey2.length == 0) ? null : endKey2;
3851       int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
3852           endKey2,  endKey);
3853 
3854       if (endComparison != 0) {
3855         return endComparison;
3856       }
3857 
3858       // use regionId as tiebreaker.
3859       // Null is considered after all possible values so make it bigger.
3860       if (l.hdfsEntry == null && r.hdfsEntry == null) {
3861         return 0;
3862       }
3863       if (l.hdfsEntry == null && r.hdfsEntry != null) {
3864         return 1;
3865       }
3866       // l.hdfsEntry must not be null
3867       if (r.hdfsEntry == null) {
3868         return -1;
3869       }
3870       // both l.hdfsEntry and r.hdfsEntry must not be null.
3871       return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
3872     }
3873   };
3874 
3875   /**
3876    * Prints summary of all tables found on the system.
3877    */
3878   private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
3879     StringBuilder sb = new StringBuilder();
3880     int numOfSkippedRegions;
3881     errors.print("Summary:");
3882     for (TableInfo tInfo : tablesInfo.values()) {
3883       numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ?
3884           skippedRegions.get(tInfo.getName()).size() : 0;
3885 
3886       if (errors.tableHasErrors(tInfo)) {
3887         errors.print("Table " + tInfo.getName() + " is inconsistent.");
3888       } else if (numOfSkippedRegions > 0){
3889         errors.print("Table " + tInfo.getName() + " is okay (with "
3890           + numOfSkippedRegions + " skipped regions).");
3891       }
3892       else {
3893         errors.print("Table " + tInfo.getName() + " is okay.");
3894       }
3895       errors.print("    Number of regions: " + tInfo.getNumRegions());
3896       if (numOfSkippedRegions > 0) {
3897         Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
3898         System.out.println("    Number of skipped regions: " + numOfSkippedRegions);
3899         System.out.println("      List of skipped regions:");
3900         for(String sr : skippedRegionStrings) {
3901           System.out.println("        " + sr);
3902         }
3903       }
3904       sb.setLength(0); // clear out existing buffer, if any.
3905       sb.append("    Deployed on: ");
3906       for (ServerName server : tInfo.deployedOn) {
3907         sb.append(" " + server.toString());
3908       }
3909       errors.print(sb.toString());
3910     }
3911   }
3912 
3913   static ErrorReporter getErrorReporter(
3914       final Configuration conf) throws ClassNotFoundException {
3915     Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
3916     return ReflectionUtils.newInstance(reporter, conf);
3917   }
3918 
3919   public interface ErrorReporter {
3920     enum ERROR_CODE {
3921       UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
3922       NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
3923       MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
3924       FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
3925       HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
3926       ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
3927       LINGERING_HFILELINK, WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK,
3928       ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR
3929     }
3930     void clear();
3931     void report(String message);
3932     void reportError(String message);
3933     void reportError(ERROR_CODE errorCode, String message);
3934     void reportError(ERROR_CODE errorCode, String message, TableInfo table);
3935     void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
3936     void reportError(
3937       ERROR_CODE errorCode,
3938       String message,
3939       TableInfo table,
3940       HbckInfo info1,
3941       HbckInfo info2
3942     );
3943     int summarize();
3944     void detail(String details);
3945     ArrayList<ERROR_CODE> getErrorList();
3946     void progress();
3947     void print(String message);
3948     void resetErrors();
3949     boolean tableHasErrors(TableInfo table);
3950   }
3951 
3952   static class PrintingErrorReporter implements ErrorReporter {
3953     public int errorCount = 0;
3954     private int showProgress;
3955     // How frequently calls to progress() will create output
3956     private static final int progressThreshold = 100;
3957 
3958     Set<TableInfo> errorTables = new HashSet<TableInfo>();
3959 
3960     // for use by unit tests to verify which errors were discovered
3961     private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
3962 
3963     @Override
3964     public void clear() {
3965       errorTables.clear();
3966       errorList.clear();
3967       errorCount = 0;
3968     }
3969 
3970     @Override
3971     public synchronized void reportError(ERROR_CODE errorCode, String message) {
3972       if (errorCode == ERROR_CODE.WRONG_USAGE) {
3973         System.err.println(message);
3974         return;
3975       }
3976 
3977       errorList.add(errorCode);
3978       if (!summary) {
3979         System.out.println("ERROR: " + message);
3980       }
3981       errorCount++;
3982       showProgress = 0;
3983     }
3984 
3985     @Override
3986     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
3987       errorTables.add(table);
3988       reportError(errorCode, message);
3989     }
3990 
3991     @Override
3992     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
3993                                          HbckInfo info) {
3994       errorTables.add(table);
3995       String reference = "(region " + info.getRegionNameAsString() + ")";
3996       reportError(errorCode, reference + " " + message);
3997     }
3998 
3999     @Override
4000     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
4001                                          HbckInfo info1, HbckInfo info2) {
4002       errorTables.add(table);
4003       String reference = "(regions " + info1.getRegionNameAsString()
4004           + " and " + info2.getRegionNameAsString() + ")";
4005       reportError(errorCode, reference + " " + message);
4006     }
4007 
4008     @Override
4009     public synchronized void reportError(String message) {
4010       reportError(ERROR_CODE.UNKNOWN, message);
4011     }
4012 
4013     /**
4014      * Report error information, but do not increment the error count.  Intended for cases
4015      * where the actual error would have been reported previously.
4016      * @param message
4017      */
4018     @Override
4019     public synchronized void report(String message) {
4020       if (! summary) {
4021         System.out.println("ERROR: " + message);
4022       }
4023       showProgress = 0;
4024     }
4025 
4026     @Override
4027     public synchronized int summarize() {
4028       System.out.println(Integer.toString(errorCount) +
4029                          " inconsistencies detected.");
4030       if (errorCount == 0) {
4031         System.out.println("Status: OK");
4032         return 0;
4033       } else {
4034         System.out.println("Status: INCONSISTENT");
4035         return -1;
4036       }
4037     }
4038 
4039     @Override
4040     public ArrayList<ERROR_CODE> getErrorList() {
4041       return errorList;
4042     }
4043 
4044     @Override
4045     public synchronized void print(String message) {
4046       if (!summary) {
4047         System.out.println(message);
4048       }
4049     }
4050 
4051     @Override
4052     public boolean tableHasErrors(TableInfo table) {
4053       return errorTables.contains(table);
4054     }
4055 
4056     @Override
4057     public void resetErrors() {
4058       errorCount = 0;
4059     }
4060 
4061     @Override
4062     public synchronized void detail(String message) {
4063       if (details) {
4064         System.out.println(message);
4065       }
4066       showProgress = 0;
4067     }
4068 
4069     @Override
4070     public synchronized void progress() {
4071       if (showProgress++ == progressThreshold) {
4072         if (!summary) {
4073           System.out.print(".");
4074         }
4075         showProgress = 0;
4076       }
4077     }
4078   }
4079 
4080   /**
4081    * Contact a region server and get all information from it
4082    */
4083   static class WorkItemRegion implements Callable<Void> {
4084     private HBaseFsck hbck;
4085     private ServerName rsinfo;
4086     private ErrorReporter errors;
4087     private HConnection connection;
4088 
4089     WorkItemRegion(HBaseFsck hbck, ServerName info,
4090                    ErrorReporter errors, HConnection connection) {
4091       this.hbck = hbck;
4092       this.rsinfo = info;
4093       this.errors = errors;
4094       this.connection = connection;
4095     }
4096 
4097     @Override
4098     public synchronized Void call() throws IOException {
4099       errors.progress();
4100       try {
4101         BlockingInterface server = connection.getAdmin(rsinfo);
4102 
4103         // list all online regions from this region server
4104         List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
4105         regions = filterRegions(regions);
4106 
4107         if (details) {
4108           errors.detail("RegionServer: " + rsinfo.getServerName() +
4109                            " number of regions: " + regions.size());
4110           for (HRegionInfo rinfo: regions) {
4111             errors.detail("  " + rinfo.getRegionNameAsString() +
4112                              " id: " + rinfo.getRegionId() +
4113                              " encoded_name: " + rinfo.getEncodedName() +
4114                              " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
4115                              " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
4116           }
4117         }
4118 
4119         // check to see if the existence of this region matches the region in META
4120         for (HRegionInfo r:regions) {
4121           HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
4122           hbi.addServer(r, rsinfo);
4123         }
4124       } catch (IOException e) {          // unable to connect to the region server.
4125         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
4126           " Unable to fetch region information. " + e);
4127         throw e;
4128       }
4129       return null;
4130     }
4131 
4132     private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
4133       List<HRegionInfo> ret = Lists.newArrayList();
4134       for (HRegionInfo hri : regions) {
4135         if (hri.isMetaTable() || (!hbck.checkMetaOnly
4136             && hbck.isTableIncluded(hri.getTable()))) {
4137           ret.add(hri);
4138         }
4139       }
4140       return ret;
4141     }
4142   }
4143 
4144   /**
4145    * Contact hdfs and get all information about specified table directory into
4146    * regioninfo list.
4147    */
4148   class WorkItemHdfsDir implements Callable<Void> {
4149     private FileStatus tableDir;
4150     private ErrorReporter errors;
4151     private FileSystem fs;
4152 
4153     WorkItemHdfsDir(FileSystem fs, ErrorReporter errors,
4154                     FileStatus status) {
4155       this.fs = fs;
4156       this.tableDir = status;
4157       this.errors = errors;
4158     }
4159 
4160     @Override
4161     public synchronized Void call() throws InterruptedException, ExecutionException {
4162       final Vector<Exception> exceptions = new Vector<Exception>();
4163       try {
4164         // level 2: <HBASE_DIR>/<table>/*
4165         final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
4166         final List<Future<?>> futures = new ArrayList<Future<?>>(regionDirs.length);
4167         for (final FileStatus regionDir : regionDirs) {
4168           errors.progress();
4169           final String encodedName = regionDir.getPath().getName();
4170           // ignore directories that aren't hexadecimal
4171           if (!encodedName.toLowerCase().matches("[0-9a-f]+")) {
4172             continue;
4173           }
4174 
4175           if (!exceptions.isEmpty()) {
4176             break;
4177           }
4178 
4179           futures.add(executor.submit(new Runnable() {
4180             @Override
4181             public void run() {
4182               try {
4183                 LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
4184                 Path regioninfoFile = new Path(regionDir.getPath(),
4185                     HRegionFileSystem.REGION_INFO_FILE);
4186                 boolean regioninfoFileExists = fs.exists(regioninfoFile);
4187 
4188                 if (!regioninfoFileExists) {
4189                   // As tables become larger it is more and more likely that by the time you
4190                   // reach a given region that it will be gone due to region splits/merges.
4191                   if (!fs.exists(regionDir.getPath())) {
4192                     LOG.warn("By the time we tried to process this region dir it was already gone: "
4193                         + regionDir.getPath());
4194                     return;
4195                   }
4196                 }
4197                 HbckInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName);
4198                 HdfsEntry he = new HdfsEntry();
4199                 synchronized (hbi) {
4200                   if (hbi.getHdfsRegionDir() != null) {
4201                     errors.print("Directory " + encodedName + " duplicate??" +
4202                         hbi.getHdfsRegionDir());
4203                   }
4204 
4205                   he.hdfsRegionDir = regionDir.getPath();
4206                   he.hdfsRegionDirModTime = regionDir.getModificationTime();
4207                   he.hdfsRegioninfoFilePresent = regioninfoFileExists;
4208                   // we add to orphan list when we attempt to read .regioninfo
4209 
4210                   // Set a flag if this region contains only edits
4211                   // This is special case if a region is left after split
4212                   he.hdfsOnlyEdits = true;
4213                   FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
4214                   Path ePath = WALSplitter.getRegionDirRecoveredEditsDir(regionDir.getPath());
4215                   for (FileStatus subDir : subDirs) {
4216                     errors.progress();
4217                     String sdName = subDir.getPath().getName();
4218                     if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
4219                       he.hdfsOnlyEdits = false;
4220                       break;
4221                     }
4222                   }
4223                   hbi.hdfsEntry = he;
4224                 }
4225               } catch (Exception e) {
4226                 LOG.error("Could not load region dir", e);
4227                 exceptions.add(e);
4228               }
4229             }
4230           }));
4231         }
4232         // Ensure all pending tasks are complete (or that we run into an exception)
4233         for (Future<?> f : futures) {
4234           if (!exceptions.isEmpty()) {
4235             break;
4236           }
4237           try {
4238             f.get();
4239           } catch (ExecutionException e) {
4240             LOG.error("Unexpected exec exception!  Should've been caught already.  (Bug?)", e);
4241             // Shouldn't happen, we already logged/caught any exceptions in the Runnable
4242           };
4243         }
4244       } catch (IOException e) {
4245         LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e);
4246         exceptions.add(e);
4247       } finally {
4248         if (!exceptions.isEmpty()) {
4249           errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
4250               + tableDir.getPath().getName()
4251               + " Unable to fetch all HDFS region information. ");
4252           // Just throw the first exception as an indication something bad happened
4253           // Don't need to propagate all the exceptions, we already logged them all anyway
4254           throw new ExecutionException("First exception in WorkItemHdfsDir",
4255               exceptions.firstElement());
4256         }
4257       }
4258       return null;
4259     }
4260   }
4261 
4262   /**
4263    * Contact hdfs and get all information about specified table directory into
4264    * regioninfo list.
4265    */
4266   static class WorkItemHdfsRegionInfo implements Callable<Void> {
4267     private HbckInfo hbi;
4268     private HBaseFsck hbck;
4269     private ErrorReporter errors;
4270 
4271     WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
4272       this.hbi = hbi;
4273       this.hbck = hbck;
4274       this.errors = errors;
4275     }
4276 
4277     @Override
4278     public synchronized Void call() throws IOException {
4279       // only load entries that haven't been loaded yet.
4280       if (hbi.getHdfsHRI() == null) {
4281         try {
4282           errors.progress();
4283           hbck.loadHdfsRegioninfo(hbi);
4284         } catch (IOException ioe) {
4285           String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
4286               + hbi.getTableName() + " in hdfs dir "
4287               + hbi.getHdfsRegionDir()
4288               + "!  It may be an invalid format or version file.  Treating as "
4289               + "an orphaned regiondir.";
4290           errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
4291           try {
4292             hbck.debugLsr(hbi.getHdfsRegionDir());
4293           } catch (IOException ioe2) {
4294             LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
4295             throw ioe2;
4296           }
4297           hbck.orphanHdfsDirs.add(hbi);
4298           throw ioe;
4299         }
4300       }
4301       return null;
4302     }
4303   };
4304 
4305   /**
4306    * Display the full report from fsck. This displays all live and dead region
4307    * servers, and all known regions.
4308    */
4309   public static void setDisplayFullReport() {
4310     details = true;
4311   }
4312 
4313   /**
4314    * Set summary mode.
4315    * Print only summary of the tables and status (OK or INCONSISTENT)
4316    */
4317   void setSummary() {
4318     summary = true;
4319   }
4320 
4321   /**
4322    * Set hbase:meta check mode.
4323    * Print only info about hbase:meta table deployment/state
4324    */
4325   void setCheckMetaOnly() {
4326     checkMetaOnly = true;
4327   }
4328 
4329   /**
4330    * Set region boundaries check mode.
4331    */
4332   void setRegionBoundariesCheck() {
4333     checkRegionBoundaries = true;
4334   }
4335 
4336   /**
4337    * Set table locks fix mode.
4338    * Delete table locks held for a long time
4339    */
4340   public void setFixTableLocks(boolean shouldFix) {
4341     fixTableLocks = shouldFix;
4342     fixAny |= shouldFix;
4343   }
4344 
4345   /**
4346    * Set orphaned table ZNodes fix mode.
4347    * Set the table state to disable in the orphaned table ZNode.
4348    */
4349   public void setFixTableZNodes(boolean shouldFix) {
4350     fixTableZNodes = shouldFix;
4351     fixAny |= shouldFix;
4352   }
4353 
4354   /**
4355    * Check if we should rerun fsck again. This checks if we've tried to
4356    * fix something and we should rerun fsck tool again.
4357    * Display the full report from fsck. This displays all live and dead
4358    * region servers, and all known regions.
4359    */
4360   void setShouldRerun() {
4361     rerun = true;
4362   }
4363 
4364   boolean shouldRerun() {
4365     return rerun;
4366   }
4367 
4368   /**
4369    * Fix inconsistencies found by fsck. This should try to fix errors (if any)
4370    * found by fsck utility.
4371    */
4372   public void setFixAssignments(boolean shouldFix) {
4373     fixAssignments = shouldFix;
4374     fixAny |= shouldFix;
4375   }
4376 
4377   boolean shouldFixAssignments() {
4378     return fixAssignments;
4379   }
4380 
4381   public void setFixMeta(boolean shouldFix) {
4382     fixMeta = shouldFix;
4383     fixAny |= shouldFix;
4384   }
4385 
4386   boolean shouldFixMeta() {
4387     return fixMeta;
4388   }
4389 
4390   public void setFixEmptyMetaCells(boolean shouldFix) {
4391     fixEmptyMetaCells = shouldFix;
4392     fixAny |= shouldFix;
4393   }
4394 
4395   boolean shouldFixEmptyMetaCells() {
4396     return fixEmptyMetaCells;
4397   }
4398 
4399   public void setCheckHdfs(boolean checking) {
4400     checkHdfs = checking;
4401   }
4402 
4403   boolean shouldCheckHdfs() {
4404     return checkHdfs;
4405   }
4406 
4407   public void setFixHdfsHoles(boolean shouldFix) {
4408     fixHdfsHoles = shouldFix;
4409     fixAny |= shouldFix;
4410   }
4411 
4412   boolean shouldFixHdfsHoles() {
4413     return fixHdfsHoles;
4414   }
4415 
4416   public void setFixTableOrphans(boolean shouldFix) {
4417     fixTableOrphans = shouldFix;
4418     fixAny |= shouldFix;
4419   }
4420 
4421   boolean shouldFixTableOrphans() {
4422     return fixTableOrphans;
4423   }
4424 
4425   public void setFixHdfsOverlaps(boolean shouldFix) {
4426     fixHdfsOverlaps = shouldFix;
4427     fixAny |= shouldFix;
4428   }
4429 
4430   boolean shouldFixHdfsOverlaps() {
4431     return fixHdfsOverlaps;
4432   }
4433 
4434   public void setFixHdfsOrphans(boolean shouldFix) {
4435     fixHdfsOrphans = shouldFix;
4436     fixAny |= shouldFix;
4437   }
4438 
4439   boolean shouldFixHdfsOrphans() {
4440     return fixHdfsOrphans;
4441   }
4442 
4443   public void setFixVersionFile(boolean shouldFix) {
4444     fixVersionFile = shouldFix;
4445     fixAny |= shouldFix;
4446   }
4447 
4448   public boolean shouldFixVersionFile() {
4449     return fixVersionFile;
4450   }
4451 
4452   public void setSidelineBigOverlaps(boolean sbo) {
4453     this.sidelineBigOverlaps = sbo;
4454   }
4455 
4456   public boolean shouldSidelineBigOverlaps() {
4457     return sidelineBigOverlaps;
4458   }
4459 
4460   public void setFixSplitParents(boolean shouldFix) {
4461     fixSplitParents = shouldFix;
4462     fixAny |= shouldFix;
4463   }
4464 
4465   boolean shouldFixSplitParents() {
4466     return fixSplitParents;
4467   }
4468 
4469   public void setFixReferenceFiles(boolean shouldFix) {
4470     fixReferenceFiles = shouldFix;
4471     fixAny |= shouldFix;
4472   }
4473 
4474   boolean shouldFixReferenceFiles() {
4475     return fixReferenceFiles;
4476   }
4477 
4478   public void setFixHFileLinks(boolean shouldFix) {
4479     fixHFileLinks = shouldFix;
4480     fixAny |= shouldFix;
4481   }
4482 
4483   boolean shouldFixHFileLinks() {
4484     return fixHFileLinks;
4485   }
4486 
4487   public boolean shouldIgnorePreCheckPermission() {
4488     return !fixAny || ignorePreCheckPermission;
4489   }
4490 
4491   public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
4492     this.ignorePreCheckPermission = ignorePreCheckPermission;
4493   }
4494 
4495   /**
4496    * @param mm maximum number of regions to merge into a single region.
4497    */
4498   public void setMaxMerge(int mm) {
4499     this.maxMerge = mm;
4500   }
4501 
4502   public int getMaxMerge() {
4503     return maxMerge;
4504   }
4505 
4506   public void setMaxOverlapsToSideline(int mo) {
4507     this.maxOverlapsToSideline = mo;
4508   }
4509 
4510   public int getMaxOverlapsToSideline() {
4511     return maxOverlapsToSideline;
4512   }
4513 
4514   /**
4515    * Only check/fix tables specified by the list,
4516    * Empty list means all tables are included.
4517    */
4518   boolean isTableIncluded(TableName table) {
4519     return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
4520   }
4521 
4522   public void includeTable(TableName table) {
4523     tablesIncluded.add(table);
4524   }
4525 
4526   Set<TableName> getIncludedTables() {
4527     return new HashSet<TableName>(tablesIncluded);
4528   }
4529 
4530   /**
4531    * We are interested in only those tables that have not changed their state in
4532    * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
4533    * @param seconds - the time in seconds
4534    */
4535   public void setTimeLag(long seconds) {
4536     timelag = seconds * 1000; // convert to milliseconds
4537   }
4538 
4539   /**
4540    *
4541    * @param sidelineDir - HDFS path to sideline data
4542    */
4543   public void setSidelineDir(String sidelineDir) {
4544     this.sidelineDir = new Path(sidelineDir);
4545   }
4546 
4547   protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4548     return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4549   }
4550 
4551   public HFileCorruptionChecker getHFilecorruptionChecker() {
4552     return hfcc;
4553   }
4554 
4555   public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4556     this.hfcc = hfcc;
4557   }
4558 
4559   public void setRetCode(int code) {
4560     this.retcode = code;
4561   }
4562 
4563   public int getRetCode() {
4564     return retcode;
4565   }
4566 
4567   protected HBaseFsck printUsageAndExit() {
4568     StringWriter sw = new StringWriter(2048);
4569     PrintWriter out = new PrintWriter(sw);
4570     out.println("Usage: fsck [opts] {only tables}");
4571     out.println(" where [opts] are:");
4572     out.println("   -help Display help options (this)");
4573     out.println("   -details Display full report of all regions.");
4574     out.println("   -timelag <timeInSeconds>  Process only regions that " +
4575                        " have not experienced any metadata updates in the last " +
4576                        " <timeInSeconds> seconds.");
4577     out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4578         " before checking if the fix worked if run with -fix");
4579     out.println("   -summary Print only summary of the tables and status.");
4580     out.println("   -metaonly Only check the state of the hbase:meta table.");
4581     out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4582     out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
4583     out.println("");
4584     out.println("  Metadata Repair options: (expert features, use with caution!)");
4585     out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
4586     out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
4587     out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
4588     out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
4589         + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4590     out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
4591     out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
4592     out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4593     out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
4594     out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
4595     out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4596     out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
4597     out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4598     out.println("   -fixSplitParents  Try to force offline split parents to be online.");
4599     out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
4600     out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
4601     out.println("   -fixHFileLinks  Try to offline lingering HFileLinks");
4602     out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
4603         + " (empty REGIONINFO_QUALIFIER rows)");
4604 
4605     out.println("");
4606     out.println("  Datafile Repair options: (expert features, use with caution!)");
4607     out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
4608     out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
4609 
4610     out.println("");
4611     out.println("  Metadata Repair shortcuts");
4612     out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4613         "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps " +
4614         "-fixReferenceFiles -fixHFileLinks -fixTableLocks -fixOrphanedTableZnodes");
4615 
4616     out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4617 
4618     out.println("");
4619     out.println("  Table lock options");
4620     out.println("   -fixTableLocks    Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
4621 
4622     out.println("");
4623     out.println("  Table Znode options");
4624     out.println("   -fixOrphanedTableZnodes    Set table state in ZNode to disabled if table does not exists");
4625 
4626     out.flush();
4627     errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4628 
4629     setRetCode(-2);
4630     return this;
4631   }
4632 
4633   /**
4634    * Main program
4635    *
4636    * @param args
4637    * @throws Exception
4638    */
4639   public static void main(String[] args) throws Exception {
4640     // create a fsck object
4641     Configuration conf = HBaseConfiguration.create();
4642     Path hbasedir = FSUtils.getRootDir(conf);
4643     URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4644     FSUtils.setFsDefault(conf, new Path(defaultFs));
4645     int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4646     System.exit(ret);
4647   }
4648 
4649   /**
4650    * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
4651    */
4652   static class HBaseFsckTool extends Configured implements Tool {
4653     HBaseFsckTool(Configuration conf) { super(conf); }
4654     @Override
4655     public int run(String[] args) throws Exception {
4656       HBaseFsck hbck = new HBaseFsck(getConf());
4657       hbck.exec(hbck.executor, args);
4658       hbck.close();
4659       return hbck.getRetCode();
4660     }
4661   };
4662 
4663 
4664   public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
4665     ServiceException, InterruptedException {
4666     long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4667 
4668     boolean checkCorruptHFiles = false;
4669     boolean sidelineCorruptHFiles = false;
4670 
4671     // Process command-line args.
4672     for (int i = 0; i < args.length; i++) {
4673       String cmd = args[i];
4674       if (cmd.equals("-help") || cmd.equals("-h")) {
4675         return printUsageAndExit();
4676       } else if (cmd.equals("-details")) {
4677         setDisplayFullReport();
4678       } else if (cmd.equals("-timelag")) {
4679         if (i == args.length - 1) {
4680           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4681           return printUsageAndExit();
4682         }
4683         try {
4684           long timelag = Long.parseLong(args[i+1]);
4685           setTimeLag(timelag);
4686         } catch (NumberFormatException e) {
4687           errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4688           return printUsageAndExit();
4689         }
4690         i++;
4691       } else if (cmd.equals("-sleepBeforeRerun")) {
4692         if (i == args.length - 1) {
4693           errors.reportError(ERROR_CODE.WRONG_USAGE,
4694             "HBaseFsck: -sleepBeforeRerun needs a value.");
4695           return printUsageAndExit();
4696         }
4697         try {
4698           sleepBeforeRerun = Long.parseLong(args[i+1]);
4699         } catch (NumberFormatException e) {
4700           errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4701           return printUsageAndExit();
4702         }
4703         i++;
4704       } else if (cmd.equals("-sidelineDir")) {
4705         if (i == args.length - 1) {
4706           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4707           return printUsageAndExit();
4708         }
4709         i++;
4710         setSidelineDir(args[i]);
4711       } else if (cmd.equals("-fix")) {
4712         errors.reportError(ERROR_CODE.WRONG_USAGE,
4713           "This option is deprecated, please use  -fixAssignments instead.");
4714         setFixAssignments(true);
4715       } else if (cmd.equals("-fixAssignments")) {
4716         setFixAssignments(true);
4717       } else if (cmd.equals("-fixMeta")) {
4718         setFixMeta(true);
4719       } else if (cmd.equals("-noHdfsChecking")) {
4720         setCheckHdfs(false);
4721       } else if (cmd.equals("-fixHdfsHoles")) {
4722         setFixHdfsHoles(true);
4723       } else if (cmd.equals("-fixHdfsOrphans")) {
4724         setFixHdfsOrphans(true);
4725       } else if (cmd.equals("-fixTableOrphans")) {
4726         setFixTableOrphans(true);
4727       } else if (cmd.equals("-fixHdfsOverlaps")) {
4728         setFixHdfsOverlaps(true);
4729       } else if (cmd.equals("-fixVersionFile")) {
4730         setFixVersionFile(true);
4731       } else if (cmd.equals("-sidelineBigOverlaps")) {
4732         setSidelineBigOverlaps(true);
4733       } else if (cmd.equals("-fixSplitParents")) {
4734         setFixSplitParents(true);
4735       } else if (cmd.equals("-ignorePreCheckPermission")) {
4736         setIgnorePreCheckPermission(true);
4737       } else if (cmd.equals("-checkCorruptHFiles")) {
4738         checkCorruptHFiles = true;
4739       } else if (cmd.equals("-sidelineCorruptHFiles")) {
4740         sidelineCorruptHFiles = true;
4741       } else if (cmd.equals("-fixReferenceFiles")) {
4742         setFixReferenceFiles(true);
4743       } else if (cmd.equals("-fixHFileLinks")) {
4744         setFixHFileLinks(true);
4745       } else if (cmd.equals("-fixEmptyMetaCells")) {
4746         setFixEmptyMetaCells(true);
4747       } else if (cmd.equals("-repair")) {
4748         // this attempts to merge overlapping hdfs regions, needs testing
4749         // under load
4750         setFixHdfsHoles(true);
4751         setFixHdfsOrphans(true);
4752         setFixMeta(true);
4753         setFixAssignments(true);
4754         setFixHdfsOverlaps(true);
4755         setFixVersionFile(true);
4756         setSidelineBigOverlaps(true);
4757         setFixSplitParents(false);
4758         setCheckHdfs(true);
4759         setFixReferenceFiles(true);
4760         setFixHFileLinks(true);
4761         setFixTableLocks(true);
4762         setFixTableZNodes(true);
4763       } else if (cmd.equals("-repairHoles")) {
4764         // this will make all missing hdfs regions available but may lose data
4765         setFixHdfsHoles(true);
4766         setFixHdfsOrphans(false);
4767         setFixMeta(true);
4768         setFixAssignments(true);
4769         setFixHdfsOverlaps(false);
4770         setSidelineBigOverlaps(false);
4771         setFixSplitParents(false);
4772         setCheckHdfs(true);
4773       } else if (cmd.equals("-maxOverlapsToSideline")) {
4774         if (i == args.length - 1) {
4775           errors.reportError(ERROR_CODE.WRONG_USAGE,
4776             "-maxOverlapsToSideline needs a numeric value argument.");
4777           return printUsageAndExit();
4778         }
4779         try {
4780           int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
4781           setMaxOverlapsToSideline(maxOverlapsToSideline);
4782         } catch (NumberFormatException e) {
4783           errors.reportError(ERROR_CODE.WRONG_USAGE,
4784             "-maxOverlapsToSideline needs a numeric value argument.");
4785           return printUsageAndExit();
4786         }
4787         i++;
4788       } else if (cmd.equals("-maxMerge")) {
4789         if (i == args.length - 1) {
4790           errors.reportError(ERROR_CODE.WRONG_USAGE,
4791             "-maxMerge needs a numeric value argument.");
4792           return printUsageAndExit();
4793         }
4794         try {
4795           int maxMerge = Integer.parseInt(args[i+1]);
4796           setMaxMerge(maxMerge);
4797         } catch (NumberFormatException e) {
4798           errors.reportError(ERROR_CODE.WRONG_USAGE,
4799             "-maxMerge needs a numeric value argument.");
4800           return printUsageAndExit();
4801         }
4802         i++;
4803       } else if (cmd.equals("-summary")) {
4804         setSummary();
4805       } else if (cmd.equals("-metaonly")) {
4806         setCheckMetaOnly();
4807       } else if (cmd.equals("-boundaries")) {
4808         setRegionBoundariesCheck();
4809       } else if (cmd.equals("-fixTableLocks")) {
4810         setFixTableLocks(true);
4811       } else if (cmd.equals("-fixOrphanedTableZnodes")) {
4812         setFixTableZNodes(true);
4813       } else if (cmd.startsWith("-")) {
4814         errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
4815         return printUsageAndExit();
4816       } else {
4817         includeTable(TableName.valueOf(cmd));
4818         errors.print("Allow checking/fixes for table: " + cmd);
4819       }
4820     }
4821 
4822     errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
4823 
4824     // pre-check current user has FS write permission or not
4825     try {
4826       preCheckPermission();
4827     } catch (AccessDeniedException ace) {
4828       Runtime.getRuntime().exit(-1);
4829     } catch (IOException ioe) {
4830       Runtime.getRuntime().exit(-1);
4831     }
4832 
4833     // do the real work of hbck
4834     connect();
4835 
4836     try {
4837       // if corrupt file mode is on, first fix them since they may be opened later
4838       if (checkCorruptHFiles || sidelineCorruptHFiles) {
4839         LOG.info("Checking all hfiles for corruption");
4840         HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
4841         setHFileCorruptionChecker(hfcc); // so we can get result
4842         Collection<TableName> tables = getIncludedTables();
4843         Collection<Path> tableDirs = new ArrayList<Path>();
4844         Path rootdir = FSUtils.getRootDir(getConf());
4845         if (tables.size() > 0) {
4846           for (TableName t : tables) {
4847             tableDirs.add(FSUtils.getTableDir(rootdir, t));
4848           }
4849         } else {
4850           tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
4851         }
4852         hfcc.checkTables(tableDirs);
4853         hfcc.report(errors);
4854       }
4855 
4856       // check and fix table integrity, region consistency.
4857       int code = onlineHbck();
4858       setRetCode(code);
4859       // If we have changed the HBase state it is better to run hbck again
4860       // to see if we haven't broken something else in the process.
4861       // We run it only once more because otherwise we can easily fall into
4862       // an infinite loop.
4863       if (shouldRerun()) {
4864         try {
4865           LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
4866           Thread.sleep(sleepBeforeRerun);
4867         } catch (InterruptedException ie) {
4868           LOG.warn("Interrupted while sleeping");
4869           return this;
4870         }
4871         // Just report
4872         setFixAssignments(false);
4873         setFixMeta(false);
4874         setFixHdfsHoles(false);
4875         setFixHdfsOverlaps(false);
4876         setFixVersionFile(false);
4877         setFixTableOrphans(false);
4878         errors.resetErrors();
4879         code = onlineHbck();
4880         setRetCode(code);
4881       }
4882     } finally {
4883       IOUtils.cleanup(null, this);
4884     }
4885     return this;
4886   }
4887 
4888   /**
4889    * ls -r for debugging purposes
4890    */
4891   void debugLsr(Path p) throws IOException {
4892     debugLsr(getConf(), p, errors);
4893   }
4894 
4895   /**
4896    * ls -r for debugging purposes
4897    */
4898   public static void debugLsr(Configuration conf,
4899       Path p) throws IOException {
4900     debugLsr(conf, p, new PrintingErrorReporter());
4901   }
4902 
4903   /**
4904    * ls -r for debugging purposes
4905    */
4906   public static void debugLsr(Configuration conf,
4907       Path p, ErrorReporter errors) throws IOException {
4908     if (!LOG.isDebugEnabled() || p == null) {
4909       return;
4910     }
4911     FileSystem fs = p.getFileSystem(conf);
4912 
4913     if (!fs.exists(p)) {
4914       // nothing
4915       return;
4916     }
4917     errors.print(p.toString());
4918 
4919     if (fs.isFile(p)) {
4920       return;
4921     }
4922 
4923     if (fs.getFileStatus(p).isDirectory()) {
4924       FileStatus[] fss= fs.listStatus(p);
4925       for (FileStatus status : fss) {
4926         debugLsr(conf, status.getPath(), errors);
4927       }
4928     }
4929   }
4930 }