View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.io.InterruptedIOException;
25  import java.util.ArrayList;
26  import java.util.Collection;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.UUID;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FSDataInputStream;
35  import org.apache.hadoop.fs.FSDataOutputStream;
36  import org.apache.hadoop.fs.FileStatus;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.FileUtil;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.fs.permission.FsPermission;
41  import org.apache.hadoop.hbase.HColumnDescriptor;
42  import org.apache.hadoop.hbase.HConstants;
43  import org.apache.hadoop.hbase.HRegionInfo;
44  import org.apache.hadoop.hbase.HTableDescriptor;
45  import org.apache.hadoop.hbase.KeyValue;
46  import org.apache.hadoop.hbase.KeyValueUtil;
47  import org.apache.hadoop.hbase.backup.HFileArchiver;
48  import org.apache.hadoop.hbase.classification.InterfaceAudience;
49  import org.apache.hadoop.hbase.fs.HFileSystem;
50  import org.apache.hadoop.hbase.io.Reference;
51  import org.apache.hadoop.hbase.util.Bytes;
52  import org.apache.hadoop.hbase.util.FSHDFSUtils;
53  import org.apache.hadoop.hbase.util.FSUtils;
54  import org.apache.hadoop.hbase.util.Pair;
55  import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
56  
57  /**
58   * View to an on-disk Region.
59   * Provides the set of methods necessary to interact with the on-disk region data.
60   */
61  @InterfaceAudience.Private
62  public class HRegionFileSystem {
63    public static final Log LOG = LogFactory.getLog(HRegionFileSystem.class);
64  
65    /** Name of the region info file that resides just under the region directory. */
66    public final static String REGION_INFO_FILE = ".regioninfo";
67  
68    /** Temporary subdirectory of the region directory used for merges. */
69    public static final String REGION_MERGES_DIR = ".merges";
70  
71    /** Temporary subdirectory of the region directory used for splits. */
72    public static final String REGION_SPLITS_DIR = ".splits";
73  
74    /** Temporary subdirectory of the region directory used for compaction output. */
75    private static final String REGION_TEMP_DIR = ".tmp";
76  
77    private final HRegionInfo regionInfo;
78    //regionInfo for interacting with FS (getting encodedName, etc)
79    private final HRegionInfo regionInfoForFs;
80    private final Configuration conf;
81    private final Path tableDir;
82    private final FileSystem fs;
83  
84    /**
85     * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the
86     * client level.
87     */
88    private final int hdfsClientRetriesNumber;
89    private final int baseSleepBeforeRetries;
90    private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10;
91    private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000;
92  
93    /**
94     * Create a view to the on-disk region
95     * @param conf the {@link Configuration} to use
96     * @param fs {@link FileSystem} that contains the region
97     * @param tableDir {@link Path} to where the table is being stored
98     * @param regionInfo {@link HRegionInfo} for region
99     */
100   HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir,
101       final HRegionInfo regionInfo) {
102     this.fs = fs;
103     this.conf = conf;
104     this.tableDir = tableDir;
105     this.regionInfo = regionInfo;
106     this.regionInfoForFs = ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo);
107     this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
108       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
109     this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
110       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
111  }
112 
113   /** @return the underlying {@link FileSystem} */
114   public FileSystem getFileSystem() {
115     return this.fs;
116   }
117 
118   /** @return the {@link HRegionInfo} that describe this on-disk region view */
119   public HRegionInfo getRegionInfo() {
120     return this.regionInfo;
121   }
122 
123   public HRegionInfo getRegionInfoForFS() {
124     return this.regionInfoForFs;
125   }
126 
127   /** @return {@link Path} to the region's root directory. */
128   public Path getTableDir() {
129     return this.tableDir;
130   }
131 
132   /** @return {@link Path} to the region directory. */
133   public Path getRegionDir() {
134     return new Path(this.tableDir, this.regionInfoForFs.getEncodedName());
135   }
136 
137   // ===========================================================================
138   //  Temp Helpers
139   // ===========================================================================
140   /** @return {@link Path} to the region's temp directory, used for file creations */
141   Path getTempDir() {
142     return new Path(getRegionDir(), REGION_TEMP_DIR);
143   }
144 
145   /**
146    * Clean up any temp detritus that may have been left around from previous operation attempts.
147    */
148   void cleanupTempDir() throws IOException {
149     deleteDir(getTempDir());
150   }
151 
152   // ===========================================================================
153   //  Store/StoreFile Helpers
154   // ===========================================================================
155   /**
156    * Returns the directory path of the specified family
157    * @param familyName Column Family Name
158    * @return {@link Path} to the directory of the specified family
159    */
160   public Path getStoreDir(final String familyName) {
161     return new Path(this.getRegionDir(), familyName);
162   }
163 
164   /**
165    * Create the store directory for the specified family name
166    * @param familyName Column Family Name
167    * @return {@link Path} to the directory of the specified family
168    * @throws IOException if the directory creation fails.
169    */
170   Path createStoreDir(final String familyName) throws IOException {
171     Path storeDir = getStoreDir(familyName);
172     if(!fs.exists(storeDir) && !createDir(storeDir))
173       throw new IOException("Failed creating "+storeDir);
174     return storeDir;
175   }
176 
177   /**
178    * Returns the store files available for the family.
179    * This methods performs the filtering based on the valid store files.
180    * @param familyName Column Family Name
181    * @return a set of {@link StoreFileInfo} for the specified family.
182    */
183   public Collection<StoreFileInfo> getStoreFiles(final byte[] familyName) throws IOException {
184     return getStoreFiles(Bytes.toString(familyName));
185   }
186 
187   public Collection<StoreFileInfo> getStoreFiles(final String familyName) throws IOException {
188     return getStoreFiles(familyName, true);
189   }
190 
191   /**
192    * Returns the store files available for the family.
193    * This methods performs the filtering based on the valid store files.
194    * @param familyName Column Family Name
195    * @return a set of {@link StoreFileInfo} for the specified family.
196    */
197   public Collection<StoreFileInfo> getStoreFiles(final String familyName, final boolean validate)
198       throws IOException {
199     Path familyDir = getStoreDir(familyName);
200     FileStatus[] files = FSUtils.listStatus(this.fs, familyDir);
201     if (files == null) {
202       LOG.debug("No StoreFiles for: " + familyDir);
203       return null;
204     }
205 
206     ArrayList<StoreFileInfo> storeFiles = new ArrayList<StoreFileInfo>(files.length);
207     for (FileStatus status: files) {
208       if (validate && !StoreFileInfo.isValid(status)) {
209         LOG.warn("Invalid StoreFile: " + status.getPath());
210         continue;
211       }
212       StoreFileInfo info = ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
213         regionInfoForFs, familyName, status.getPath());
214       storeFiles.add(info);
215 
216     }
217     return storeFiles;
218   }
219 
220   /**
221    * Return Qualified Path of the specified family/file
222    *
223    * @param familyName Column Family Name
224    * @param fileName File Name
225    * @return The qualified Path for the specified family/file
226    */
227   Path getStoreFilePath(final String familyName, final String fileName) {
228     Path familyDir = getStoreDir(familyName);
229     return new Path(familyDir, fileName).makeQualified(this.fs);
230   }
231 
232   /**
233    * Return the store file information of the specified family/file.
234    *
235    * @param familyName Column Family Name
236    * @param fileName File Name
237    * @return The {@link StoreFileInfo} for the specified family/file
238    */
239   StoreFileInfo getStoreFileInfo(final String familyName, final String fileName)
240       throws IOException {
241     Path familyDir = getStoreDir(familyName);
242     return ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
243       regionInfoForFs, familyName, new Path(familyDir, fileName));
244   }
245 
246   /**
247    * Returns true if the specified family has reference files
248    * @param familyName Column Family Name
249    * @return true if family contains reference files
250    * @throws IOException
251    */
252   public boolean hasReferences(final String familyName) throws IOException {
253     FileStatus[] files = FSUtils.listStatus(fs, getStoreDir(familyName),
254         new FSUtils.ReferenceFileFilter(fs));
255     return files != null && files.length > 0;
256   }
257 
258   /**
259    * Check whether region has Reference file
260    * @param htd table desciptor of the region
261    * @return true if region has reference file
262    * @throws IOException
263    */
264   public boolean hasReferences(final HTableDescriptor htd) throws IOException {
265     for (HColumnDescriptor family : htd.getFamilies()) {
266       if (hasReferences(family.getNameAsString())) {
267         return true;
268       }
269     }
270     return false;
271   }
272 
273   /**
274    * @return the set of families present on disk
275    * @throws IOException
276    */
277   public Collection<String> getFamilies() throws IOException {
278     FileStatus[] fds = FSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs));
279     if (fds == null) return null;
280 
281     ArrayList<String> families = new ArrayList<String>(fds.length);
282     for (FileStatus status: fds) {
283       families.add(status.getPath().getName());
284     }
285 
286     return families;
287   }
288 
289   /**
290    * Remove the region family from disk, archiving the store files.
291    * @param familyName Column Family Name
292    * @throws IOException if an error occours during the archiving
293    */
294   public void deleteFamily(final String familyName) throws IOException {
295     // archive family store files
296     HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName));
297 
298     // delete the family folder
299     Path familyDir = getStoreDir(familyName);
300     if(fs.exists(familyDir) && !deleteDir(familyDir))
301       throw new IOException("Could not delete family " + familyName
302           + " from FileSystem for region " + regionInfoForFs.getRegionNameAsString() + "("
303           + regionInfoForFs.getEncodedName() + ")");
304   }
305 
306   /**
307    * Generate a unique file name, used by createTempName() and commitStoreFile()
308    * @param suffix extra information to append to the generated name
309    * @return Unique file name
310    */
311   private static String generateUniqueName(final String suffix) {
312     String name = UUID.randomUUID().toString().replaceAll("-", "");
313     if (suffix != null) name += suffix;
314     return name;
315   }
316 
317   /**
318    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
319    * to get a safer file creation.
320    * <code>
321    * Path file = fs.createTempName();
322    * ...StoreFile.Writer(file)...
323    * fs.commitStoreFile("family", file);
324    * </code>
325    *
326    * @return Unique {@link Path} of the temporary file
327    */
328   public Path createTempName() {
329     return createTempName(null);
330   }
331 
332   /**
333    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
334    * to get a safer file creation.
335    * <code>
336    * Path file = fs.createTempName();
337    * ...StoreFile.Writer(file)...
338    * fs.commitStoreFile("family", file);
339    * </code>
340    *
341    * @param suffix extra information to append to the generated name
342    * @return Unique {@link Path} of the temporary file
343    */
344   public Path createTempName(final String suffix) {
345     return new Path(getTempDir(), generateUniqueName(suffix));
346   }
347 
348   /**
349    * Move the file from a build/temp location to the main family store directory.
350    * @param familyName Family that will gain the file
351    * @param buildPath {@link Path} to the file to commit.
352    * @return The new {@link Path} of the committed file
353    * @throws IOException
354    */
355   public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException {
356     Path dstPath = preCommitStoreFile(familyName, buildPath, -1, false);
357     return commitStoreFile(buildPath, dstPath);
358   }
359 
360   /**
361    * Generate the filename in the main family store directory for moving the file from a build/temp
362    *  location.
363    * @param familyName Family that will gain the file
364    * @param buildPath {@link Path} to the file to commit.
365    * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number)
366    * @param generateNewName False if you want to keep the buildPath name
367    * @return The new {@link Path} of the to be committed file
368    * @throws IOException
369    */
370   private Path preCommitStoreFile(final String familyName, final Path buildPath,
371       final long seqNum, final boolean generateNewName) throws IOException {
372     Path storeDir = getStoreDir(familyName);
373     if(!fs.exists(storeDir) && !createDir(storeDir))
374       throw new IOException("Failed creating " + storeDir);
375 
376     String name = buildPath.getName();
377     if (generateNewName) {
378       name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_");
379     }
380     Path dstPath = new Path(storeDir, name);
381     if (!fs.exists(buildPath)) {
382       throw new FileNotFoundException(buildPath.toString());
383     }
384     LOG.debug("Committing store file " + buildPath + " as " + dstPath);
385     return dstPath;
386    }
387 
388   /*
389    * Moves file from staging dir to region dir
390    * @param buildPath {@link Path} to the file to commit.
391    * @param dstPath {@link Path} to the file under region dir
392    * @return The {@link Path} of the committed file
393    * @throws IOException
394    */
395   Path commitStoreFile(final Path buildPath, Path dstPath) throws IOException {
396     // buildPath exists, therefore not doing an exists() check.
397     if (!rename(buildPath, dstPath)) {
398       throw new IOException("Failed rename of " + buildPath + " to " + dstPath);
399     }
400     return dstPath;
401   }
402 
403   /**
404    * Moves multiple store files to the relative region's family store directory.
405    * @param storeFiles list of store files divided by family
406    * @throws IOException
407    */
408   void commitStoreFiles(final Map<byte[], List<StoreFile>> storeFiles) throws IOException {
409     for (Map.Entry<byte[], List<StoreFile>> es: storeFiles.entrySet()) {
410       String familyName = Bytes.toString(es.getKey());
411       for (StoreFile sf: es.getValue()) {
412         commitStoreFile(familyName, sf.getPath());
413       }
414     }
415   }
416 
417   /**
418    * Archives the specified store file from the specified family.
419    * @param familyName Family that contains the store files
420    * @param filePath {@link Path} to the store file to remove
421    * @throws IOException if the archiving fails
422    */
423   public void removeStoreFile(final String familyName, final Path filePath)
424       throws IOException {
425     HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfoForFs,
426         this.tableDir, Bytes.toBytes(familyName), filePath);
427   }
428 
429   /**
430    * Closes and archives the specified store files from the specified family.
431    * @param familyName Family that contains the store files
432    * @param storeFiles set of store files to remove
433    * @throws IOException if the archiving fails
434    */
435   public void removeStoreFiles(final String familyName, final Collection<StoreFile> storeFiles)
436       throws IOException {
437     HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfoForFs,
438         this.tableDir, Bytes.toBytes(familyName), storeFiles);
439   }
440 
441   /**
442    * Bulk load: Add a specified store file to the specified family.
443    * If the source file is on the same different file-system is moved from the
444    * source location to the destination location, otherwise is copied over.
445    *
446    * @param familyName Family that will gain the file
447    * @param srcPath {@link Path} to the file to import
448    * @param seqNum Bulk Load sequence number
449    * @return The destination {@link Path} of the bulk loaded file
450    * @throws IOException
451    */
452   Pair<Path, Path> bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum)
453       throws IOException {
454     // Copy the file if it's on another filesystem
455     FileSystem srcFs = srcPath.getFileSystem(conf);
456     FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem)fs).getBackingFs() : fs;
457 
458     // We can't compare FileSystem instances as equals() includes UGI instance
459     // as part of the comparison and won't work when doing SecureBulkLoad
460     // TODO deal with viewFS
461     if (!FSHDFSUtils.isSameHdfs(conf, srcFs, desFs)) {
462       LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " +
463           "the destination store. Copying file over to destination filesystem.");
464       Path tmpPath = createTempName();
465       FileUtil.copy(srcFs, srcPath, fs, tmpPath, false, conf);
466       LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath);
467       srcPath = tmpPath;
468     }
469 
470     return new Pair<>(srcPath, preCommitStoreFile(familyName, srcPath, seqNum, true));
471   }
472 
473   // ===========================================================================
474   //  Splits Helpers
475   // ===========================================================================
476   /** @return {@link Path} to the temp directory used during split operations */
477   Path getSplitsDir() {
478     return new Path(getRegionDir(), REGION_SPLITS_DIR);
479   }
480 
481   Path getSplitsDir(final HRegionInfo hri) {
482     return new Path(getSplitsDir(), hri.getEncodedName());
483   }
484 
485   /**
486    * Clean up any split detritus that may have been left around from previous split attempts.
487    */
488   void cleanupSplitsDir() throws IOException {
489     deleteDir(getSplitsDir());
490   }
491 
492   /**
493    * Clean up any split detritus that may have been left around from previous
494    * split attempts.
495    * Call this method on initial region deploy.
496    * @throws IOException
497    */
498   void cleanupAnySplitDetritus() throws IOException {
499     Path splitdir = this.getSplitsDir();
500     if (!fs.exists(splitdir)) return;
501     // Look at the splitdir.  It could have the encoded names of the daughter
502     // regions we tried to make.  See if the daughter regions actually got made
503     // out under the tabledir.  If here under splitdir still, then the split did
504     // not complete.  Try and do cleanup.  This code WILL NOT catch the case
505     // where we successfully created daughter a but regionserver crashed during
506     // the creation of region b.  In this case, there'll be an orphan daughter
507     // dir in the filesystem.  TOOD: Fix.
508     FileStatus[] daughters = FSUtils.listStatus(fs, splitdir, new FSUtils.DirFilter(fs));
509     if (daughters != null) {
510       for (FileStatus daughter: daughters) {
511         Path daughterDir = new Path(getTableDir(), daughter.getPath().getName());
512         if (fs.exists(daughterDir) && !deleteDir(daughterDir)) {
513           throw new IOException("Failed delete of " + daughterDir);
514         }
515       }
516     }
517     cleanupSplitsDir();
518     LOG.info("Cleaned up old failed split transaction detritus: " + splitdir);
519   }
520 
521   /**
522    * Remove daughter region
523    * @param regionInfo daughter {@link HRegionInfo}
524    * @throws IOException
525    */
526   void cleanupDaughterRegion(final HRegionInfo regionInfo) throws IOException {
527     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
528     if (this.fs.exists(regionDir) && !deleteDir(regionDir)) {
529       throw new IOException("Failed delete of " + regionDir);
530     }
531   }
532 
533   /**
534    * Commit a daughter region, moving it from the split temporary directory
535    * to the proper location in the filesystem.
536    *
537    * @param regionInfo                 daughter {@link org.apache.hadoop.hbase.HRegionInfo}
538    * @throws IOException
539    */
540   Path commitDaughterRegion(final HRegionInfo regionInfo)
541       throws IOException {
542     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
543     Path daughterTmpDir = this.getSplitsDir(regionInfo);
544 
545     if (fs.exists(daughterTmpDir)) {
546 
547       // Write HRI to a file in case we need to recover hbase:meta
548       Path regionInfoFile = new Path(daughterTmpDir, REGION_INFO_FILE);
549       byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
550       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
551 
552       // Move the daughter temp dir to the table dir
553       if (!rename(daughterTmpDir, regionDir)) {
554         throw new IOException("Unable to rename " + daughterTmpDir + " to " + regionDir);
555       }
556     }
557 
558     return regionDir;
559   }
560 
561   /**
562    * Create the region splits directory.
563    */
564   void createSplitsDir() throws IOException {
565     Path splitdir = getSplitsDir();
566     if (fs.exists(splitdir)) {
567       LOG.info("The " + splitdir + " directory exists.  Hence deleting it to recreate it");
568       if (!deleteDir(splitdir)) {
569         throw new IOException("Failed deletion of " + splitdir
570             + " before creating them again.");
571       }
572     }
573     // splitDir doesn't exists now. No need to do an exists() call for it.
574     if (!createDir(splitdir)) {
575       throw new IOException("Failed create of " + splitdir);
576     }
577   }
578 
579   /**
580    * Write out a split reference. Package local so it doesnt leak out of
581    * regionserver.
582    * @param hri {@link HRegionInfo} of the destination
583    * @param familyName Column Family Name
584    * @param f File to split.
585    * @param splitRow Split Row
586    * @param top True if we are referring to the top half of the hfile.
587    * @return Path to created reference.
588    * @param splitPolicy
589    * @throws IOException
590    */
591   Path splitStoreFile(final HRegionInfo hri, final String familyName, final StoreFile f,
592       final byte[] splitRow, final boolean top, RegionSplitPolicy splitPolicy) throws IOException {
593 
594     if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck(familyName)) {
595       // Check whether the split row lies in the range of the store file
596       // If it is outside the range, return directly.
597       try {
598         if (top) {
599           //check if larger than last key.
600           KeyValue splitKey = KeyValueUtil.createFirstOnRow(splitRow);
601           byte[] lastKey = f.createReader().getLastKey();
602           // If lastKey is null means storefile is empty.
603           if (lastKey == null) {
604             return null;
605           }
606           if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(),
607             splitKey.getKeyOffset(), splitKey.getKeyLength(), lastKey, 0, lastKey.length) > 0) {
608             return null;
609           }
610         } else {
611           //check if smaller than first key
612           KeyValue splitKey = KeyValueUtil.createLastOnRow(splitRow);
613           byte[] firstKey = f.createReader().getFirstKey();
614           // If firstKey is null means storefile is empty.
615           if (firstKey == null) {
616             return null;
617           }
618           if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(),
619             splitKey.getKeyOffset(), splitKey.getKeyLength(), firstKey, 0, firstKey.length) < 0) {
620             return null;
621           }
622         }
623       } finally {
624         f.closeReader(f.getCacheConf() != null ? f.getCacheConf().shouldEvictOnClose() : true);
625       }
626     }
627 
628     Path splitDir = new Path(getSplitsDir(hri), familyName);
629     // A reference to the bottom half of the hsf store file.
630     Reference r =
631       top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow);
632     // Add the referred-to regions name as a dot separated suffix.
633     // See REF_NAME_REGEX regex above.  The referred-to regions name is
634     // up in the path of the passed in <code>f</code> -- parentdir is family,
635     // then the directory above is the region name.
636     String parentRegionName = regionInfoForFs.getEncodedName();
637     // Write reference with same file id only with the other region name as
638     // suffix and into the new region location (under same family).
639     Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
640     return r.write(fs, p);
641   }
642 
643   // ===========================================================================
644   //  Merge Helpers
645   // ===========================================================================
646   /** @return {@link Path} to the temp directory used during merge operations */
647   Path getMergesDir() {
648     return new Path(getRegionDir(), REGION_MERGES_DIR);
649   }
650 
651   Path getMergesDir(final HRegionInfo hri) {
652     return new Path(getMergesDir(), hri.getEncodedName());
653   }
654 
655   /**
656    * Clean up any merge detritus that may have been left around from previous merge attempts.
657    */
658   void cleanupMergesDir() throws IOException {
659     deleteDir(getMergesDir());
660   }
661 
662   /**
663    * Remove merged region
664    * @param mergedRegion {@link HRegionInfo}
665    * @throws IOException
666    */
667   void cleanupMergedRegion(final HRegionInfo mergedRegion) throws IOException {
668     Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName());
669     if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) {
670       throw new IOException("Failed delete of " + regionDir);
671     }
672   }
673 
674   static boolean mkdirs(FileSystem fs, Configuration conf, Path dir) throws IOException {
675     if (FSUtils.isDistributedFileSystem(fs)) {
676       return fs.mkdirs(dir);
677     }
678     if (!conf.getBoolean(HConstants.ENABLE_DATA_FILE_UMASK, false)) {
679       return fs.mkdirs(dir);
680     }
681     FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
682     return fs.mkdirs(dir, perms);
683   }
684   /**
685    * Create the region merges directory.
686    * @throws IOException If merges dir already exists or we fail to create it.
687    * @see HRegionFileSystem#cleanupMergesDir()
688    */
689   void createMergesDir() throws IOException {
690     Path mergesdir = getMergesDir();
691     if (fs.exists(mergesdir)) {
692       LOG.info("The " + mergesdir
693           + " directory exists.  Hence deleting it to recreate it");
694       if (!fs.delete(mergesdir, true)) {
695         throw new IOException("Failed deletion of " + mergesdir
696             + " before creating them again.");
697       }
698     }
699     if (!mkdirs(fs, conf, mergesdir))
700       throw new IOException("Failed create of " + mergesdir);
701   }
702 
703   /**
704    * Write out a merge reference under the given merges directory. Package local
705    * so it doesnt leak out of regionserver.
706    * @param mergedRegion {@link HRegionInfo} of the merged region
707    * @param familyName Column Family Name
708    * @param f File to create reference.
709    * @param mergedDir
710    * @return Path to created reference.
711    * @throws IOException
712    */
713   Path mergeStoreFile(final HRegionInfo mergedRegion, final String familyName,
714       final StoreFile f, final Path mergedDir)
715       throws IOException {
716     Path referenceDir = new Path(new Path(mergedDir,
717         mergedRegion.getEncodedName()), familyName);
718     // A whole reference to the store file.
719     Reference r = Reference.createTopReference(regionInfoForFs.getStartKey());
720     // Add the referred-to regions name as a dot separated suffix.
721     // See REF_NAME_REGEX regex above. The referred-to regions name is
722     // up in the path of the passed in <code>f</code> -- parentdir is family,
723     // then the directory above is the region name.
724     String mergingRegionName = regionInfoForFs.getEncodedName();
725     // Write reference with same file id only with the other region name as
726     // suffix and into the new region location (under same family).
727     Path p = new Path(referenceDir, f.getPath().getName() + "."
728         + mergingRegionName);
729     return r.write(fs, p);
730   }
731 
732   /**
733    * Commit a merged region, moving it from the merges temporary directory to
734    * the proper location in the filesystem.
735    * @param mergedRegionInfo merged region {@link HRegionInfo}
736    * @throws IOException
737    */
738   void commitMergedRegion(final HRegionInfo mergedRegionInfo) throws IOException {
739     Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName());
740     Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo);
741     // Move the tmp dir in the expected location
742     if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) {
743       if (!fs.rename(mergedRegionTmpDir, regionDir)) {
744         throw new IOException("Unable to rename " + mergedRegionTmpDir + " to "
745             + regionDir);
746       }
747     }
748   }
749 
750   // ===========================================================================
751   //  Create/Open/Delete Helpers
752   // ===========================================================================
753   /**
754    * Log the current state of the region
755    * @param LOG log to output information
756    * @throws IOException if an unexpected exception occurs
757    */
758   void logFileSystemState(final Log LOG) throws IOException {
759     FSUtils.logFileSystemState(fs, this.getRegionDir(), LOG);
760   }
761 
762   /**
763    * @param hri
764    * @return Content of the file we write out to the filesystem under a region
765    * @throws IOException
766    */
767   private static byte[] getRegionInfoFileContent(final HRegionInfo hri) throws IOException {
768     return hri.toDelimitedByteArray();
769   }
770 
771   /**
772    * Create a {@link HRegionInfo} from the serialized version on-disk.
773    * @param fs {@link FileSystem} that contains the Region Info file
774    * @param regionDir {@link Path} to the Region Directory that contains the Info file
775    * @return An {@link HRegionInfo} instance gotten from the Region Info file.
776    * @throws IOException if an error occurred during file open/read operation.
777    */
778   public static HRegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir)
779       throws IOException {
780     FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE));
781     try {
782       return HRegionInfo.parseFrom(in);
783     } finally {
784       in.close();
785     }
786   }
787 
788   /**
789    * Write the .regioninfo file on-disk.
790    */
791   private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs,
792       final Path regionInfoFile, final byte[] content) throws IOException {
793     // First check to get the permissions
794     FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
795     // Write the RegionInfo file content
796     FSDataOutputStream out = FSUtils.create(fs, regionInfoFile, perms, null);
797     try {
798       out.write(content);
799     } finally {
800       out.close();
801     }
802   }
803 
804   /**
805    * Write out an info file under the stored region directory. Useful recovering mangled regions.
806    * If the regionInfo already exists on-disk, then we fast exit.
807    */
808   void checkRegionInfoOnFilesystem() throws IOException {
809     // Compose the content of the file so we can compare to length in filesystem. If not same,
810     // rewrite it (it may have been written in the old format using Writables instead of pb). The
811     // pb version is much shorter -- we write now w/o the toString version -- so checking length
812     // only should be sufficient. I don't want to read the file every time to check if it pb
813     // serialized.
814     byte[] content = getRegionInfoFileContent(regionInfoForFs);
815 
816     // Verify if the region directory exists before opening a region. We need to do this since if
817     // the region directory doesn't exist we will re-create the region directory and a new HRI
818     // when HRegion.openHRegion() is called.
819     try {
820       FileStatus status = fs.getFileStatus(getRegionDir());
821     } catch (FileNotFoundException e) {
822       LOG.warn(getRegionDir() + " doesn't exist for region: " + regionInfoForFs.getEncodedName() +
823           " on table " + regionInfo.getTable());
824     }
825 
826     try {
827       Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
828       FileStatus status = fs.getFileStatus(regionInfoFile);
829       if (status != null && status.getLen() == content.length) {
830         // Then assume the content good and move on.
831         // NOTE: that the length is not sufficient to define the the content matches.
832         return;
833       }
834 
835       LOG.info("Rewriting .regioninfo file at: " + regionInfoFile);
836       if (!fs.delete(regionInfoFile, false)) {
837         throw new IOException("Unable to remove existing " + regionInfoFile);
838       }
839     } catch (FileNotFoundException e) {
840       LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName() +
841           " on table " + regionInfo.getTable());
842     }
843 
844     // Write HRI to a file in case we need to recover hbase:meta
845     writeRegionInfoOnFilesystem(content, true);
846   }
847 
848   /**
849    * Write out an info file under the region directory. Useful recovering mangled regions.
850    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
851    */
852   private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException {
853     byte[] content = getRegionInfoFileContent(regionInfoForFs);
854     writeRegionInfoOnFilesystem(content, useTempDir);
855   }
856 
857   /**
858    * Write out an info file under the region directory. Useful recovering mangled regions.
859    * @param regionInfoContent serialized version of the {@link HRegionInfo}
860    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
861    */
862   private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent,
863       final boolean useTempDir) throws IOException {
864     Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
865     if (useTempDir) {
866       // Create in tmpDir and then move into place in case we crash after
867       // create but before close. If we don't successfully close the file,
868       // subsequent region reopens will fail the below because create is
869       // registered in NN.
870 
871       // And then create the file
872       Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE);
873 
874       // If datanode crashes or if the RS goes down just before the close is called while trying to
875       // close the created regioninfo file in the .tmp directory then on next
876       // creation we will be getting AlreadyCreatedException.
877       // Hence delete and create the file if exists.
878       if (FSUtils.isExists(fs, tmpPath)) {
879         FSUtils.delete(fs, tmpPath, true);
880       }
881 
882       // Write HRI to a file in case we need to recover hbase:meta
883       writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
884 
885       // Move the created file to the original path
886       if (fs.exists(tmpPath) &&  !rename(tmpPath, regionInfoFile)) {
887         throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile);
888       }
889     } else {
890       // Write HRI to a file in case we need to recover hbase:meta
891       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
892     }
893   }
894 
895   /**
896    * Create a new Region on file-system.
897    * @param conf the {@link Configuration} to use
898    * @param fs {@link FileSystem} from which to add the region
899    * @param tableDir {@link Path} to where the table is being stored
900    * @param regionInfo {@link HRegionInfo} for region to be added
901    * @throws IOException if the region creation fails due to a FileSystem exception.
902    */
903   public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf,
904       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
905     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
906     Path regionDir = regionFs.getRegionDir();
907 
908     if (fs.exists(regionDir)) {
909       LOG.warn("Trying to create a region that already exists on disk: " + regionDir);
910       throw new IOException("The specified region already exists on disk: " + regionDir);
911     }
912 
913     // Create the region directory
914     if (!createDirOnFileSystem(fs, conf, regionDir)) {
915       LOG.warn("Unable to create the region directory: " + regionDir);
916       throw new IOException("Unable to create region directory: " + regionDir);
917     }
918 
919     // Write HRI to a file in case we need to recover hbase:meta
920     // Only primary replicas should write region info
921     if (regionInfo.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
922       regionFs.writeRegionInfoOnFilesystem(false);
923     } else {
924       if (LOG.isDebugEnabled())
925         LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
926     }
927     return regionFs;
928   }
929 
930   /**
931    * Open Region from file-system.
932    * @param conf the {@link Configuration} to use
933    * @param fs {@link FileSystem} from which to add the region
934    * @param tableDir {@link Path} to where the table is being stored
935    * @param regionInfo {@link HRegionInfo} for region to be added
936    * @param readOnly True if you don't want to edit the region data
937    * @throws IOException if the region creation fails due to a FileSystem exception.
938    */
939   public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf,
940       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo, boolean readOnly)
941       throws IOException {
942     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
943     Path regionDir = regionFs.getRegionDir();
944 
945     if (!fs.exists(regionDir)) {
946       LOG.warn("Trying to open a region that do not exists on disk: " + regionDir);
947       throw new IOException("The specified region do not exists on disk: " + regionDir);
948     }
949 
950     if (!readOnly) {
951       // Cleanup temporary directories
952       regionFs.cleanupTempDir();
953       regionFs.cleanupSplitsDir();
954       regionFs.cleanupMergesDir();
955 
956       // If it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
957       // Only create HRI if we are the default replica
958       if (regionInfo.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
959         regionFs.checkRegionInfoOnFilesystem();
960       } else {
961         if (LOG.isDebugEnabled()) {
962           LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
963         }
964       }
965     }
966 
967     return regionFs;
968   }
969 
970   /**
971    * Remove the region from the table directory, archiving the region's hfiles.
972    * @param conf the {@link Configuration} to use
973    * @param fs {@link FileSystem} from which to remove the region
974    * @param tableDir {@link Path} to where the table is being stored
975    * @param regionInfo {@link HRegionInfo} for region to be deleted
976    * @throws IOException if the request cannot be completed
977    */
978   public static void deleteRegionFromFileSystem(final Configuration conf,
979       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
980     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
981     Path regionDir = regionFs.getRegionDir();
982 
983     if (!fs.exists(regionDir)) {
984       LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir);
985       return;
986     }
987 
988     if (LOG.isDebugEnabled()) {
989       LOG.debug("DELETING region " + regionDir);
990     }
991 
992     // Archive region
993     Path rootDir = FSUtils.getRootDir(conf);
994     HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir);
995 
996     // Delete empty region dir
997     if (!fs.delete(regionDir, true)) {
998       LOG.warn("Failed delete of " + regionDir);
999     }
1000   }
1001 
1002   /**
1003    * Creates a directory. Assumes the user has already checked for this directory existence.
1004    * @param dir
1005    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1006    *         whether the directory exists or not, and returns true if it exists.
1007    * @throws IOException
1008    */
1009   boolean createDir(Path dir) throws IOException {
1010     int i = 0;
1011     IOException lastIOE = null;
1012     do {
1013       try {
1014         return mkdirs(fs, conf, dir);
1015       } catch (IOException ioe) {
1016         lastIOE = ioe;
1017         if (fs.exists(dir)) return true; // directory is present
1018         try {
1019           sleepBeforeRetry("Create Directory", i+1);
1020         } catch (InterruptedException e) {
1021           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1022         }
1023       }
1024     } while (++i <= hdfsClientRetriesNumber);
1025     throw new IOException("Exception in createDir", lastIOE);
1026   }
1027 
1028   /**
1029    * Renames a directory. Assumes the user has already checked for this directory existence.
1030    * @param srcpath
1031    * @param dstPath
1032    * @return true if rename is successful.
1033    * @throws IOException
1034    */
1035   boolean rename(Path srcpath, Path dstPath) throws IOException {
1036     IOException lastIOE = null;
1037     int i = 0;
1038     do {
1039       try {
1040         return fs.rename(srcpath, dstPath);
1041       } catch (IOException ioe) {
1042         lastIOE = ioe;
1043         if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move
1044         // dir is not there, retry after some time.
1045         try {
1046           sleepBeforeRetry("Rename Directory", i+1);
1047         } catch (InterruptedException e) {
1048           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1049         }
1050       }
1051     } while (++i <= hdfsClientRetriesNumber);
1052 
1053     throw new IOException("Exception in rename", lastIOE);
1054   }
1055 
1056   /**
1057    * Deletes a directory. Assumes the user has already checked for this directory existence.
1058    * @param dir
1059    * @return true if the directory is deleted.
1060    * @throws IOException
1061    */
1062   boolean deleteDir(Path dir) throws IOException {
1063     IOException lastIOE = null;
1064     int i = 0;
1065     do {
1066       try {
1067         return fs.delete(dir, true);
1068       } catch (IOException ioe) {
1069         lastIOE = ioe;
1070         if (!fs.exists(dir)) return true;
1071         // dir is there, retry deleting after some time.
1072         try {
1073           sleepBeforeRetry("Delete Directory", i+1);
1074         } catch (InterruptedException e) {
1075           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1076         }
1077       }
1078     } while (++i <= hdfsClientRetriesNumber);
1079 
1080     throw new IOException("Exception in DeleteDir", lastIOE);
1081   }
1082 
1083   /**
1084    * sleeping logic; handles the interrupt exception.
1085    */
1086   private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException {
1087     sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1088   }
1089 
1090   /**
1091    * Creates a directory for a filesystem and configuration object. Assumes the user has already
1092    * checked for this directory existence.
1093    * @param fs
1094    * @param conf
1095    * @param dir
1096    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1097    *         whether the directory exists or not, and returns true if it exists.
1098    * @throws IOException
1099    */
1100   private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir)
1101       throws IOException {
1102     int i = 0;
1103     IOException lastIOE = null;
1104     int hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
1105       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
1106     int baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
1107       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
1108     do {
1109       try {
1110         return fs.mkdirs(dir);
1111       } catch (IOException ioe) {
1112         lastIOE = ioe;
1113         if (fs.exists(dir)) return true; // directory is present
1114         try {
1115           sleepBeforeRetry("Create Directory", i+1, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1116         } catch (InterruptedException e) {
1117           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1118         }
1119       }
1120     } while (++i <= hdfsClientRetriesNumber);
1121 
1122     throw new IOException("Exception in createDir", lastIOE);
1123   }
1124 
1125   /**
1126    * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
1127    * for this to avoid re-looking for the integer values.
1128    */
1129   private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries,
1130       int hdfsClientRetriesNumber) throws InterruptedException {
1131     if (sleepMultiplier > hdfsClientRetriesNumber) {
1132       LOG.debug(msg + ", retries exhausted");
1133       return;
1134     }
1135     LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
1136     Thread.sleep((long)baseSleepBeforeRetries * sleepMultiplier);
1137   }
1138 }