View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver.wal;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.util.concurrent.atomic.AtomicLong;
23  
24  import org.apache.hadoop.hbase.client.Table;
25  import org.junit.Assert;
26  import static org.junit.Assert.assertTrue;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.hbase.HColumnDescriptor;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.HRegionInfo;
37  import org.apache.hadoop.hbase.HTableDescriptor;
38  import org.apache.hadoop.hbase.KeyValue;
39  import org.apache.hadoop.hbase.testclassification.MediumTests;
40  import org.apache.hadoop.hbase.MiniHBaseCluster;
41  import org.apache.hadoop.hbase.TableName;
42  import org.apache.hadoop.hbase.client.Admin;
43  import org.apache.hadoop.hbase.client.HTable;
44  import org.apache.hadoop.hbase.client.Put;
45  import org.apache.hadoop.hbase.regionserver.HRegionServer;
46  import org.apache.hadoop.hbase.util.Bytes;
47  import org.apache.hadoop.hbase.util.FSUtils;
48  import org.apache.hadoop.hbase.wal.DefaultWALProvider;
49  import org.apache.hadoop.hbase.wal.WAL;
50  import org.apache.hadoop.hbase.wal.WALFactory;
51  import org.apache.hadoop.hbase.wal.WALKey;
52  import org.apache.hadoop.hbase.wal.WALSplitter;
53  import org.apache.hadoop.hdfs.MiniDFSCluster;
54  import org.junit.After;
55  import org.junit.Before;
56  import org.junit.BeforeClass;
57  import org.junit.Test;
58  import org.junit.experimental.categories.Category;
59  
60  /**
61   * Tests for conditions that should trigger RegionServer aborts when
62   * rolling the current WAL fails.
63   */
64  @Category(MediumTests.class)
65  public class TestLogRollAbort {
66    private static final Log LOG = LogFactory.getLog(TestLogRolling.class);
67    private static MiniDFSCluster dfsCluster;
68    private static Admin admin;
69    private static MiniHBaseCluster cluster;
70    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
71  
72    /* For the split-then-roll test */
73    private static final Path HBASEDIR = new Path("/hbase");
74    private static final Path HBASELOGDIR = new Path("/hbaselog");
75    private static final Path OLDLOGDIR = new Path(HBASELOGDIR, HConstants.HREGION_OLDLOGDIR_NAME);
76  
77    // Need to override this setup so we can edit the config before it gets sent
78    // to the HDFS & HBase cluster startup.
79    @BeforeClass
80    public static void setUpBeforeClass() throws Exception {
81      // Tweak default timeout values down for faster recovery
82      TEST_UTIL.getConfiguration().setInt(
83          "hbase.regionserver.logroll.errors.tolerated", 2);
84      TEST_UTIL.getConfiguration().setInt("hbase.rpc.timeout", 10 * 1000);
85  
86      // Increase the amount of time between client retries
87      TEST_UTIL.getConfiguration().setLong("hbase.client.pause", 5 * 1000);
88  
89      // make sure log.hflush() calls syncFs() to open a pipeline
90      TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true);
91      // lower the namenode & datanode heartbeat so the namenode
92      // quickly detects datanode failures
93      TEST_UTIL.getConfiguration().setInt("dfs.namenode.heartbeat.recheck-interval", 5000);
94      TEST_UTIL.getConfiguration().setInt("dfs.heartbeat.interval", 1);
95      // the namenode might still try to choose the recently-dead datanode
96      // for a pipeline, so try to a new pipeline multiple times
97      TEST_UTIL.getConfiguration().setInt("dfs.client.block.write.retries", 10);
98    }
99  
100   private Configuration conf;
101   private FileSystem fs;
102 
103   @Before
104   public void setUp() throws Exception {
105     TEST_UTIL.startMiniCluster(2);
106 
107     cluster = TEST_UTIL.getHBaseCluster();
108     dfsCluster = TEST_UTIL.getDFSCluster();
109     admin = TEST_UTIL.getHBaseAdmin();
110     conf = TEST_UTIL.getConfiguration();
111     fs = TEST_UTIL.getDFSCluster().getFileSystem();
112 
113     // disable region rebalancing (interferes with log watching)
114     cluster.getMaster().balanceSwitch(false);
115     FSUtils.setRootDir(conf, HBASEDIR);
116     FSUtils.setWALRootDir(conf, HBASELOGDIR);
117   }
118 
119   @After
120   public void tearDown() throws Exception {
121     TEST_UTIL.shutdownMiniCluster();
122   }
123 
124   /**
125    * Tests that RegionServer aborts if we hit an error closing the WAL when
126    * there are unsynced WAL edits.  See HBASE-4282.
127    */
128   @Test
129   public void testRSAbortWithUnflushedEdits() throws Exception {
130     LOG.info("Starting testRSAbortWithUnflushedEdits()");
131 
132     // When the hbase:meta table can be opened, the region servers are running
133     new HTable(TEST_UTIL.getConfiguration(),
134       TableName.META_TABLE_NAME).close();
135 
136     // Create the test table and open it
137     TableName tableName = TableName.valueOf(this.getClass().getSimpleName());
138     HTableDescriptor desc = new HTableDescriptor(tableName);
139     desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
140 
141     admin.createTable(desc);
142     Table table = new HTable(TEST_UTIL.getConfiguration(), desc.getTableName());
143     try {
144 
145       HRegionServer server = TEST_UTIL.getRSForFirstRegionInTable(tableName);
146       WAL log = server.getWAL(null);
147 
148       // don't run this test without append support (HDFS-200 & HDFS-142)
149       assertTrue("Need append support for this test",
150         FSUtils.isAppendSupported(TEST_UTIL.getConfiguration()));
151 
152       Put p = new Put(Bytes.toBytes("row2001"));
153       p.add(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2001));
154       table.put(p);
155 
156       log.sync();
157 
158       p = new Put(Bytes.toBytes("row2002"));
159       p.add(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2002));
160       table.put(p);
161 
162       dfsCluster.restartDataNodes();
163       LOG.info("Restarted datanodes");
164 
165       try {
166         log.rollWriter(true);
167       } catch (FailedLogCloseException flce) {
168         // Expected exception.  We used to expect that there would be unsynced appends but this
169         // not reliable now that sync plays a roll in wall rolling.  The above puts also now call
170         // sync.
171       } catch (Throwable t) {
172         LOG.fatal("FAILED TEST: Got wrong exception", t);
173       }
174     } finally {
175       table.close();
176     }
177   }
178 
179   /**
180    * Tests the case where a RegionServer enters a GC pause,
181    * comes back online after the master declared it dead and started to split.
182    * Want log rolling after a master split to fail. See HBASE-2312.
183    */
184   @Test (timeout=300000)
185   public void testLogRollAfterSplitStart() throws IOException {
186     LOG.info("Verify wal roll after split starts will fail.");
187     String logName = "testLogRollAfterSplitStart";
188     Path thisTestsDir = new Path(HBASELOGDIR, DefaultWALProvider.getWALDirectoryName(logName));
189     final WALFactory wals = new WALFactory(conf, null, logName);
190 
191     try {
192       // put some entries in an WAL
193       TableName tableName =
194           TableName.valueOf(this.getClass().getName());
195       HRegionInfo regioninfo = new HRegionInfo(tableName,
196           HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
197       final WAL log = wals.getWAL(regioninfo.getEncodedNameAsBytes());
198     
199       final AtomicLong sequenceId = new AtomicLong(1);
200 
201       final int total = 20;
202       for (int i = 0; i < total; i++) {
203         WALEdit kvs = new WALEdit();
204         kvs.add(new KeyValue(Bytes.toBytes(i), tableName.getName(), tableName.getName()));
205         HTableDescriptor htd = new HTableDescriptor(tableName);
206         htd.addFamily(new HColumnDescriptor("column"));
207         log.append(htd, regioninfo, new WALKey(regioninfo.getEncodedNameAsBytes(), tableName,
208             System.currentTimeMillis()), kvs, sequenceId, true, null);
209       }
210       // Send the data to HDFS datanodes and close the HDFS writer
211       log.sync();
212       ((FSHLog) log).replaceWriter(((FSHLog)log).getOldPath(), null, null, null);
213 
214       /* code taken from MasterFileSystem.getLogDirs(), which is called from MasterFileSystem.splitLog()
215        * handles RS shutdowns (as observed by the splitting process)
216        */
217       // rename the directory so a rogue RS doesn't create more WALs
218       Path rsSplitDir = thisTestsDir.suffix(DefaultWALProvider.SPLITTING_EXT);
219       if (!fs.rename(thisTestsDir, rsSplitDir)) {
220         throw new IOException("Failed fs.rename for log split: " + thisTestsDir);
221       }
222       LOG.debug("Renamed region directory: " + rsSplitDir);
223 
224       LOG.debug("Processing the old log files.");
225       WALSplitter.split(HBASELOGDIR, rsSplitDir, OLDLOGDIR, fs, conf, wals);
226 
227       LOG.debug("Trying to roll the WAL.");
228       try {
229         log.rollWriter();
230         Assert.fail("rollWriter() did not throw any exception.");
231       } catch (IOException ioe) {
232         if (ioe.getCause() instanceof FileNotFoundException) {
233           LOG.info("Got the expected exception: ", ioe.getCause());
234         } else {
235           Assert.fail("Unexpected exception: " + ioe);
236         }
237       }
238     } finally {
239       wals.close();
240       if (fs.exists(thisTestsDir)) {
241         fs.delete(thisTestsDir, true);
242       }
243     }
244   }
245 }