1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.junit.Assert.assertTrue;
22
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.List;
26 import java.util.concurrent.CountDownLatch;
27
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30 import org.apache.hadoop.conf.Configuration;
31 import org.apache.hadoop.fs.FileSystem;
32 import org.apache.hadoop.fs.Path;
33 import org.apache.hadoop.hbase.Cell;
34 import org.apache.hadoop.hbase.CellScanner;
35 import org.apache.hadoop.hbase.ChoreService;
36 import org.apache.hadoop.hbase.CoordinatedStateManager;
37 import org.apache.hadoop.hbase.HBaseTestingUtility;
38 import org.apache.hadoop.hbase.HConstants;
39 import org.apache.hadoop.hbase.HTableDescriptor;
40 import org.apache.hadoop.hbase.Server;
41 import org.apache.hadoop.hbase.ServerName;
42 import org.apache.hadoop.hbase.TableName;
43 import org.apache.hadoop.hbase.client.ClusterConnection;
44 import org.apache.hadoop.hbase.client.Durability;
45 import org.apache.hadoop.hbase.client.Put;
46 import org.apache.hadoop.hbase.regionserver.wal.DamagedWALException;
47 import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
48 import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
49 import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
50 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
51 import org.apache.hadoop.hbase.testclassification.MediumTests;
52 import org.apache.hadoop.hbase.util.Bytes;
53 import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
54 import org.apache.hadoop.hbase.util.Threads;
55 import org.apache.hadoop.hbase.wal.WAL;
56 import org.apache.hadoop.hbase.wal.WALKey;
57 import org.apache.hadoop.hbase.wal.WALProvider.Writer;
58 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
59 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
60 import org.junit.After;
61 import org.junit.Before;
62 import org.junit.Ignore;
63 import org.junit.Rule;
64 import org.junit.Test;
65 import org.junit.experimental.categories.Category;
66 import org.junit.rules.TestName;
67 import org.mockito.Mockito;
68
69
70
71
72
73 @Category({MediumTests.class})
74 public class TestWALLockup {
75 private static final Log LOG = LogFactory.getLog(TestWALLockup.class);
76 @Rule public TestName name = new TestName();
77
78 private static final String COLUMN_FAMILY = "MyCF";
79 private static final byte [] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
80
81 HRegion region = null;
82
83 private static HBaseTestingUtility TEST_UTIL;
84 private static Configuration CONF ;
85 private String dir;
86
87
88 protected TableName tableName;
89
90 @Before
91 public void setup() throws IOException {
92 TEST_UTIL = HBaseTestingUtility.createLocalHTU();
93 CONF = TEST_UTIL.getConfiguration();
94
95 CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
96 dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
97 tableName = TableName.valueOf(name.getMethodName());
98 }
99
100 @After
101 public void tearDown() throws Exception {
102 EnvironmentEdgeManagerTestHelper.reset();
103 LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
104 TEST_UTIL.cleanupTestDir();
105 }
106
107 String getName() {
108 return name.getMethodName();
109 }
110
111
112
113
114
115
116
117
118 @Ignore @Test (timeout=30000)
119 public void testLockupWhenSyncInMiddleOfZigZagSetup() throws IOException {
120
121 class DodgyFSLog extends FSHLog {
122
123 volatile boolean throwException = false;
124
125
126 CountDownLatch latch = new CountDownLatch(1);
127
128 public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf)
129 throws IOException {
130 super(fs, root, logDir, conf);
131 }
132
133 @Override
134 protected void afterCreatingZigZagLatch() {
135
136
137
138
139 if (throwException) {
140 try {
141 LOG.info("LATCHED");
142 this.latch.await();
143 } catch (InterruptedException e) {
144
145 e.printStackTrace();
146 }
147 }
148 }
149
150 @Override
151 protected void beforeWaitOnSafePoint() {
152 if (throwException) {
153 LOG.info("COUNTDOWN");
154
155
156
157 while (this.latch.getCount() <= 0) Threads.sleep(1);
158 this.latch.countDown();
159 }
160 }
161
162 @Override
163 protected Writer createWriterInstance(Path path) throws IOException {
164 final Writer w = super.createWriterInstance(path);
165 return new Writer() {
166 @Override
167 public void close() throws IOException {
168 w.close();
169 }
170
171 @Override
172 public void sync() throws IOException {
173 if (throwException) {
174 throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");
175 }
176 w.sync();
177 }
178
179 @Override
180 public void append(Entry entry) throws IOException {
181 if (throwException) {
182 throw new IOException("FAKE! Failed to replace a bad datanode...APPEND");
183 }
184 w.append(entry);
185 }
186
187 @Override
188 public long getLength() throws IOException {
189 return w.getLength();
190 }
191 };
192 }
193 }
194
195
196 Server server = Mockito.mock(Server.class);
197 Mockito.when(server.getConfiguration()).thenReturn(CONF);
198 Mockito.when(server.isStopped()).thenReturn(false);
199 Mockito.when(server.isAborted()).thenReturn(false);
200 RegionServerServices services = Mockito.mock(RegionServerServices.class);
201
202
203 FileSystem fs = FileSystem.get(CONF);
204 Path rootDir = new Path(dir + getName());
205 DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
206 Path originalWAL = dodgyWAL.getCurrentFileName();
207
208 LogRoller logRoller = new LogRoller(server, services);
209 logRoller.addWAL(dodgyWAL);
210
211 logRoller.start();
212
213 HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
214 final HRegion region = initHRegion(tableName, null, null, dodgyWAL);
215 byte [] bytes = Bytes.toBytes(getName());
216 try {
217
218
219
220 Put put = new Put(bytes);
221 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
222 WALKey key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(), htd.getTableName());
223 WALEdit edit = new WALEdit();
224 List<Cell> cells = new ArrayList<Cell>();
225 for (CellScanner cs = put.cellScanner(); cs.advance();) {
226 edit.add(cs.current());
227 cells.add(cs.current());
228 }
229
230
231 for (int i = 0; i < 1000; i++) {
232 dodgyWAL.append(htd, region.getRegionInfo(), key, edit, region.getSequenceId(), true,
233 cells);
234 }
235
236 dodgyWAL.throwException = true;
237
238 dodgyWAL.append(htd, region.getRegionInfo(), key, edit, region.getSequenceId(), true, cells);
239 boolean exception = false;
240 try {
241 dodgyWAL.sync();
242 } catch (Exception e) {
243 exception = true;
244 }
245 assertTrue("Did not get sync exception", exception);
246
247
248
249
250 Thread t = new Thread ("flusher") {
251 public void run() {
252 try {
253 region.flush(false);
254 } catch (IOException e) {
255
256 e.printStackTrace();
257 }
258 };
259 };
260 t.setDaemon(true);
261 t.start();
262
263 while (!region.writestate.flushing) Threads.sleep(1);
264
265 assertTrue(originalWAL != dodgyWAL.getCurrentFileName());
266
267 dodgyWAL.throwException = false;
268 region.put(put);
269 } finally {
270
271 Mockito.when(server.isStopped()).thenReturn(true);
272 if (logRoller != null) logRoller.interrupt();
273 if (region != null) region.close();
274 if (dodgyWAL != null) dodgyWAL.close();
275 }
276 }
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292 @Test(timeout = 20000)
293 public void testLockup16960() throws IOException {
294
295 class DodgyFSLog extends FSHLog {
296
297 volatile boolean throwException = false;
298
299 public DodgyFSLog(FileSystem fs, Path root, String logDir,
300 Configuration conf) throws IOException {
301 super(fs, root, logDir, conf);
302 }
303
304 @Override
305 protected Writer createWriterInstance(Path path) throws IOException {
306 final Writer w = super.createWriterInstance(path);
307 return new Writer() {
308 @Override
309 public void close() throws IOException {
310 w.close();
311 }
312
313 @Override
314 public void sync() throws IOException {
315 if (throwException) {
316 throw new IOException(
317 "FAKE! Failed to replace a bad datanode...SYNC");
318 }
319 w.sync();
320 }
321
322 @Override
323 public void append(Entry entry) throws IOException {
324 if (throwException) {
325 throw new IOException(
326 "FAKE! Failed to replace a bad datanode...APPEND");
327 }
328 w.append(entry);
329 }
330
331 @Override
332 public long getLength() throws IOException {
333 return w.getLength();
334 }
335 };
336 }
337
338 @Override
339 public byte[][] rollWriter(boolean force) throws FailedLogCloseException,
340 IOException {
341 if (throwException) {
342 throw new FailedLogCloseException("testLockup16960");
343 }
344 return super.rollWriter(force);
345 }
346 }
347
348
349 Server server = new DummyServer(CONF, ServerName.valueOf(
350 "hostname1.example.org", 1234, 1L).toString());
351 RegionServerServices services = Mockito.mock(RegionServerServices.class);
352
353 CONF.setLong("hbase.regionserver.hlog.sync.timeout", 10000);
354
355
356
357 FileSystem fs = FileSystem.get(CONF);
358 Path rootDir = new Path(dir + getName());
359 DodgyFSLog dodgyWAL1 = new DodgyFSLog(fs, rootDir, getName(), CONF);
360
361 Path rootDir2 = new Path(dir + getName() + "2");
362 final DodgyFSLog dodgyWAL2 = new DodgyFSLog(fs, rootDir2, getName() + "2",
363 CONF);
364
365 dodgyWAL2.registerWALActionsListener(new DummyWALActionsListener());
366
367
368 LogRoller logRoller = new LogRoller(server, services);
369 logRoller.addWAL(dodgyWAL1);
370 logRoller.addWAL(dodgyWAL2);
371
372 logRoller.start();
373
374 HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
375 final HRegion region = initHRegion(tableName, null, null, dodgyWAL1);
376 byte[] bytes = Bytes.toBytes(getName());
377
378 try {
379 Put put = new Put(bytes);
380 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
381 WALKey key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(),
382 htd.getTableName());
383 WALEdit edit = new WALEdit();
384 List<Cell> cells = new ArrayList<Cell>();
385 for (CellScanner cs = put.cellScanner(); cs.advance();) {
386 edit.add(cs.current());
387 cells.add(cs.current());
388 }
389
390 LOG.info("SET throwing of exception on append");
391 dodgyWAL1.throwException = true;
392
393 dodgyWAL1.append(htd, region.getRegionInfo(), key, edit,
394 region.getSequenceId(), true, cells);
395 boolean exception = false;
396 try {
397 dodgyWAL1.sync();
398 } catch (Exception e) {
399 exception = true;
400 }
401 assertTrue("Did not get sync exception", exception);
402
403
404
405 try {
406
407 Thread.sleep(50);
408 } catch (InterruptedException e) {
409 e.printStackTrace();
410 }
411
412 final CountDownLatch latch = new CountDownLatch(1);
413
414
415
416 key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(),
417 TableName.valueOf("sleep"));
418 dodgyWAL2.append(htd, region.getRegionInfo(), key, edit,
419 region.getSequenceId(), true, cells);
420
421 Thread t = new Thread("Sync") {
422 public void run() {
423 try {
424 dodgyWAL2.sync();
425 } catch (IOException e) {
426 LOG.info("In sync", e);
427 }
428 latch.countDown();
429 LOG.info("Sync exiting");
430 };
431 };
432 t.setDaemon(true);
433 t.start();
434 try {
435
436 Thread.sleep(100);
437 } catch (InterruptedException e1) {
438 e1.printStackTrace();
439 }
440
441 key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(),
442 TableName.valueOf("DamagedWALException"));
443 dodgyWAL2.append(htd, region.getRegionInfo(), key, edit,
444 region.getSequenceId(), true, cells);
445
446 while (latch.getCount() > 0) {
447 Threads.sleep(100);
448 }
449 assertTrue(server.isAborted());
450 } finally {
451 if (logRoller != null) {
452 logRoller.interrupt();
453 }
454 try {
455 if (region != null) {
456 region.close();
457 }
458 if (dodgyWAL1 != null) {
459 dodgyWAL1.close();
460 }
461 if (dodgyWAL2 != null) {
462 dodgyWAL2.close();
463 }
464 } catch (Exception e) {
465 LOG.info("On way out", e);
466 }
467 }
468 }
469
470 static class DummyServer implements Server {
471 private Configuration conf;
472 private String serverName;
473 private boolean isAborted = false;
474
475 public DummyServer(Configuration conf, String serverName) {
476 this.conf = conf;
477 this.serverName = serverName;
478 }
479
480 @Override
481 public Configuration getConfiguration() {
482 return conf;
483 }
484
485 @Override
486 public ZooKeeperWatcher getZooKeeper() {
487 return null;
488 }
489
490 @Override
491 public CoordinatedStateManager getCoordinatedStateManager() {
492 return null;
493 }
494
495 @Override
496 public ClusterConnection getConnection() {
497 return null;
498 }
499
500 @Override
501 public MetaTableLocator getMetaTableLocator() {
502 return null;
503 }
504
505 @Override
506 public ServerName getServerName() {
507 return ServerName.valueOf(this.serverName);
508 }
509
510 @Override
511 public void abort(String why, Throwable e) {
512 LOG.info("Aborting " + serverName);
513 this.isAborted = true;
514 }
515
516 @Override
517 public boolean isAborted() {
518 return this.isAborted;
519 }
520
521 @Override
522 public void stop(String why) {
523 this.isAborted = true;
524 }
525
526 @Override
527 public boolean isStopped() {
528 return this.isAborted;
529 }
530
531 @Override
532 public ChoreService getChoreService() {
533 return null;
534 }
535
536 }
537
538 static class DummyWALActionsListener extends WALActionsListener.Base {
539
540 @Override
541 public void visitLogEntryBeforeWrite(HTableDescriptor htd, WALKey logKey,
542 WALEdit logEdit) throws IOException {
543 if (logKey.getTablename().getNameAsString().equalsIgnoreCase("sleep")) {
544 try {
545 Thread.sleep(1000);
546 } catch (InterruptedException e) {
547 e.printStackTrace();
548 }
549 }
550 if (logKey.getTablename().getNameAsString()
551 .equalsIgnoreCase("DamagedWALException")) {
552 throw new DamagedWALException("Failed appending");
553 }
554 }
555
556 }
557
558
559
560
561
562 public HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey, WAL wal)
563 throws IOException {
564 return TEST_UTIL.createLocalHRegion(tableName.getName(), startKey, stopKey,
565 getName(), CONF, false, Durability.SYNC_WAL,
566 wal, COLUMN_FAMILY_BYTES);
567 }
568 }