1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master.procedure;
20
21 import java.io.IOException;
22 import java.util.concurrent.CountDownLatch;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.conf.Configuration;
27 import org.apache.hadoop.fs.FileSystem;
28 import org.apache.hadoop.fs.Path;
29 import org.apache.hadoop.hbase.HBaseTestingUtility;
30 import org.apache.hadoop.hbase.HRegionInfo;
31 import org.apache.hadoop.hbase.HTableDescriptor;
32 import org.apache.hadoop.hbase.MiniHBaseCluster;
33 import org.apache.hadoop.hbase.TableName;
34 import org.apache.hadoop.hbase.master.HMaster;
35 import org.apache.hadoop.hbase.procedure2.Procedure;
36 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
37 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
38 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.TestProcedure;
39 import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
40 import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
41 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.CreateTableState;
42 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DeleteTableState;
43 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DisableTableState;
44 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.EnableTableState;
45 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.TruncateTableState;
46 import org.apache.hadoop.hbase.testclassification.LargeTests;
47 import org.apache.hadoop.hbase.util.Bytes;
48 import org.apache.hadoop.hbase.util.FSUtils;
49 import org.apache.hadoop.hbase.util.ModifyRegionUtils;
50 import org.junit.After;
51 import org.junit.Before;
52 import org.junit.Ignore;
53 import org.junit.Test;
54 import org.junit.experimental.categories.Category;
55 import org.mockito.Mockito;
56
57 import static org.junit.Assert.assertEquals;
58 import static org.junit.Assert.assertTrue;
59 import static org.junit.Assert.fail;
60
61 @Category(LargeTests.class)
62 public class TestMasterFailoverWithProcedures {
63 private static final Log LOG = LogFactory.getLog(TestMasterFailoverWithProcedures.class);
64
65 protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
66
67 private static void setupConf(Configuration conf) {
68
69 conf.setInt("hbase.procedure.store.wal.max.retries.before.roll", 1);
70 conf.setInt("hbase.procedure.store.wal.wait.before.roll", 0);
71 conf.setInt("hbase.procedure.store.wal.max.roll.retries", 1);
72 conf.setInt("hbase.procedure.store.wal.sync.failure.roll.max", 1);
73 }
74
75 @Before
76 public void setup() throws Exception {
77 setupConf(UTIL.getConfiguration());
78 UTIL.startMiniCluster(2, 1);
79
80 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
81 ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, false);
82 ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, false);
83 }
84
85 @After
86 public void tearDown() throws Exception {
87 try {
88 UTIL.shutdownMiniCluster();
89 } catch (Exception e) {
90 LOG.warn("failure shutting down cluster", e);
91 }
92 }
93
94 @Test(timeout=60000)
95 @Ignore
96 public void testWalRecoverLease() throws Exception {
97 final ProcedureStore masterStore = getMasterProcedureExecutor().getStore();
98 assertTrue("expected WALStore for this test", masterStore instanceof WALProcedureStore);
99
100 HMaster firstMaster = UTIL.getHBaseCluster().getMaster();
101
102 final CountDownLatch masterStoreAbort = new CountDownLatch(1);
103 masterStore.registerListener(new ProcedureStore.ProcedureStoreListener() {
104 @Override
105 public void postSync() {}
106
107 @Override
108 public void abortProcess() {
109 LOG.debug("Abort store of Master");
110 masterStoreAbort.countDown();
111 }
112 });
113
114
115
116 HMaster backupMaster3 = Mockito.mock(HMaster.class);
117 Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration();
118 Mockito.doReturn(true).when(backupMaster3).isActiveMaster();
119 final WALProcedureStore backupStore3 = new WALProcedureStore(firstMaster.getConfiguration(),
120 firstMaster.getMasterFileSystem().getFileSystem(),
121 ((WALProcedureStore)masterStore).getWALDir(),
122 new MasterProcedureEnv.WALStoreLeaseRecovery(backupMaster3));
123
124 final CountDownLatch backupStore3Abort = new CountDownLatch(1);
125 backupStore3.registerListener(new ProcedureStore.ProcedureStoreListener() {
126 @Override
127 public void postSync() {}
128
129 @Override
130 public void abortProcess() {
131 LOG.debug("Abort store of backupMaster3");
132 backupStore3Abort.countDown();
133 backupStore3.stop(true);
134 }
135 });
136 backupStore3.start(1);
137 backupStore3.recoverLease();
138
139
140 HTableDescriptor htd = MasterProcedureTestingUtility.createHTD(TableName.valueOf("mtb"), "f");
141 HRegionInfo[] regions = ModifyRegionUtils.createHRegionInfos(htd, null);
142 LOG.debug("submit proc");
143 try {
144 getMasterProcedureExecutor().submitProcedure(
145 new CreateTableProcedure(getMasterProcedureExecutor().getEnvironment(), htd, regions));
146 fail("expected RuntimeException 'sync aborted'");
147 } catch (RuntimeException e) {
148 LOG.info("got " + e.getMessage());
149 }
150 LOG.debug("wait master store abort");
151 masterStoreAbort.await();
152
153
154 LOG.debug("wait backup master to startup");
155 waitBackupMaster(UTIL, firstMaster);
156 assertEquals(true, firstMaster.isStopped());
157
158
159 LOG.debug("wait the store to abort");
160 backupStore3.getStoreTracker().setDeleted(1, false);
161 try {
162 backupStore3.delete(1);
163 fail("expected RuntimeException 'sync aborted'");
164 } catch (RuntimeException e) {
165 LOG.info("got " + e.getMessage());
166 }
167 backupStore3Abort.await();
168 }
169
170
171
172
173 @Test
174 public void testWALfencingWithoutWALRolling() throws IOException {
175 testWALfencing(false);
176 }
177
178
179
180
181
182 @Test
183 public void testWALfencingWithWALRolling() throws IOException {
184 testWALfencing(true);
185 }
186
187 public void testWALfencing(boolean walRolls) throws IOException {
188 final ProcedureStore procStore = getMasterProcedureExecutor().getStore();
189 assertTrue("expected WALStore for this test", procStore instanceof WALProcedureStore);
190
191 HMaster firstMaster = UTIL.getHBaseCluster().getMaster();
192
193
194 firstMaster.getConfiguration().setLong("hbase.procedure.store.wal.roll.threshold", 1);
195
196 HMaster backupMaster3 = Mockito.mock(HMaster.class);
197 Mockito.doReturn(firstMaster.getConfiguration()).when(backupMaster3).getConfiguration();
198 Mockito.doReturn(true).when(backupMaster3).isActiveMaster();
199 final WALProcedureStore procStore2 = new WALProcedureStore(firstMaster.getConfiguration(),
200 firstMaster.getMasterFileSystem().getFileSystem(),
201 ((WALProcedureStore)procStore).getWALDir(),
202 new MasterProcedureEnv.WALStoreLeaseRecovery(backupMaster3));
203
204
205 LOG.info("Starting new WALProcedureStore");
206 procStore2.start(1);
207 procStore2.recoverLease();
208
209
210
211 if (walRolls) {
212 LOG.info("Inserting into second WALProcedureStore, causing WAL rolls");
213 for (int i = 0; i < 512; i++) {
214
215 Procedure proc2 = new TestProcedure(i);
216 procStore2.insert(proc2, null);
217 procStore2.delete(proc2.getProcId());
218 }
219 }
220
221
222
223
224 LOG.info("Inserting into first WALProcedureStore");
225 try {
226 procStore.insert(new TestProcedure(11), null);
227 fail("Inserting into Procedure Store should have failed");
228 } catch (Exception ex) {
229 LOG.info("Received expected exception", ex);
230 }
231 }
232
233
234
235
236 @Test(timeout=60000)
237 public void testCreateWithFailover() throws Exception {
238
239
240
241
242
243 testCreateWithFailoverAtStep(CreateTableState.CREATE_TABLE_ASSIGN_REGIONS.ordinal());
244 }
245
246 private void testCreateWithFailoverAtStep(final int step) throws Exception {
247 final TableName tableName = TableName.valueOf("testCreateWithFailoverAtStep" + step);
248
249
250 ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
251 ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, true);
252 ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, true);
253
254
255 byte[][] splitKeys = null;
256 HTableDescriptor htd = MasterProcedureTestingUtility.createHTD(tableName, "f1", "f2");
257 HRegionInfo[] regions = ModifyRegionUtils.createHRegionInfos(htd, splitKeys);
258 long procId = procExec.submitProcedure(
259 new CreateTableProcedure(procExec.getEnvironment(), htd, regions));
260 testRecoveryAndDoubleExecution(UTIL, procId, step, CreateTableState.values());
261
262 MasterProcedureTestingUtility.validateTableCreation(
263 UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2");
264 }
265
266
267
268
269 @Test(timeout=60000)
270 public void testDeleteWithFailover() throws Exception {
271
272
273
274
275
276 testDeleteWithFailoverAtStep(DeleteTableState.DELETE_TABLE_UNASSIGN_REGIONS.ordinal());
277 }
278
279 private void testDeleteWithFailoverAtStep(final int step) throws Exception {
280 final TableName tableName = TableName.valueOf("testDeleteWithFailoverAtStep" + step);
281
282
283 byte[][] splitKeys = null;
284 HRegionInfo[] regions = MasterProcedureTestingUtility.createTable(
285 getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2");
286 Path tableDir = FSUtils.getTableDir(getRootDir(), tableName);
287 MasterProcedureTestingUtility.validateTableCreation(
288 UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2");
289 UTIL.getHBaseAdmin().disableTable(tableName);
290
291 ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
292 ProcedureTestingUtility.setKillBeforeStoreUpdate(procExec, true);
293 ProcedureTestingUtility.setToggleKillBeforeStoreUpdate(procExec, true);
294
295
296 long procId = procExec.submitProcedure(
297 new DeleteTableProcedure(procExec.getEnvironment(), tableName));
298 testRecoveryAndDoubleExecution(UTIL, procId, step, DeleteTableState.values());
299
300 MasterProcedureTestingUtility.validateTableDeletion(
301 UTIL.getHBaseCluster().getMaster(), tableName, regions, "f1", "f2");
302 }
303
304
305
306
307 @Test(timeout=90000)
308 public void testTruncateWithFailover() throws Exception {
309
310
311
312
313
314 testTruncateWithFailoverAtStep(true, TruncateTableState.TRUNCATE_TABLE_ADD_TO_META.ordinal());
315 }
316
317 private void testTruncateWithFailoverAtStep(final boolean preserveSplits, final int step)
318 throws Exception {
319 final TableName tableName = TableName.valueOf("testTruncateWithFailoverAtStep" + step);
320
321
322 final String[] families = new String[] { "f1", "f2" };
323 final byte[][] splitKeys = new byte[][] {
324 Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
325 };
326 HRegionInfo[] regions = MasterProcedureTestingUtility.createTable(
327 getMasterProcedureExecutor(), tableName, splitKeys, families);
328
329 MasterProcedureTestingUtility.loadData(
330 UTIL.getConnection(), tableName, 100, splitKeys, families);
331 assertEquals(100, UTIL.countRows(tableName));
332
333 UTIL.getHBaseAdmin().disableTable(tableName);
334
335 ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
336 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
337
338
339 long procId = procExec.submitProcedure(
340 new TruncateTableProcedure(procExec.getEnvironment(), tableName, preserveSplits));
341 testRecoveryAndDoubleExecution(UTIL, procId, step, TruncateTableState.values());
342
343 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
344 UTIL.waitUntilAllRegionsAssigned(tableName);
345
346
347 regions = UTIL.getHBaseAdmin().getTableRegions(tableName).toArray(new HRegionInfo[0]);
348 if (preserveSplits) {
349 assertEquals(1 + splitKeys.length, regions.length);
350 } else {
351 assertEquals(1, regions.length);
352 }
353 MasterProcedureTestingUtility.validateTableCreation(
354 UTIL.getHBaseCluster().getMaster(), tableName, regions, families);
355
356
357 assertEquals(0, UTIL.countRows(tableName));
358
359
360 MasterProcedureTestingUtility.loadData(
361 UTIL.getConnection(), tableName, 50, splitKeys, families);
362 assertEquals(50, UTIL.countRows(tableName));
363 }
364
365
366
367
368 @Test(timeout=60000)
369 public void testDisableTableWithFailover() throws Exception {
370
371
372
373
374
375 testDisableTableWithFailoverAtStep(
376 DisableTableState.DISABLE_TABLE_MARK_REGIONS_OFFLINE.ordinal());
377 }
378
379 private void testDisableTableWithFailoverAtStep(final int step) throws Exception {
380 final TableName tableName = TableName.valueOf("testDisableTableWithFailoverAtStep" + step);
381
382
383 final byte[][] splitKeys = new byte[][] {
384 Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
385 };
386 MasterProcedureTestingUtility.createTable(
387 getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2");
388
389 ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
390 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
391
392
393 long procId = procExec.submitProcedure(
394 new DisableTableProcedure(procExec.getEnvironment(), tableName, false));
395 testRecoveryAndDoubleExecution(UTIL, procId, step, DisableTableState.values());
396
397 MasterProcedureTestingUtility.validateTableIsDisabled(
398 UTIL.getHBaseCluster().getMaster(), tableName);
399 }
400
401
402
403
404 @Test(timeout=60000)
405 public void testEnableTableWithFailover() throws Exception {
406
407
408
409
410
411 testEnableTableWithFailoverAtStep(
412 EnableTableState.ENABLE_TABLE_MARK_REGIONS_ONLINE.ordinal());
413 }
414
415 private void testEnableTableWithFailoverAtStep(final int step) throws Exception {
416 final TableName tableName = TableName.valueOf("testEnableTableWithFailoverAtStep" + step);
417
418
419 final byte[][] splitKeys = new byte[][] {
420 Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
421 };
422 MasterProcedureTestingUtility.createTable(
423 getMasterProcedureExecutor(), tableName, splitKeys, "f1", "f2");
424 UTIL.getHBaseAdmin().disableTable(tableName);
425
426 ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
427 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
428
429
430 long procId = procExec.submitProcedure(
431 new EnableTableProcedure(procExec.getEnvironment(), tableName, false));
432 testRecoveryAndDoubleExecution(UTIL, procId, step, EnableTableState.values());
433
434 MasterProcedureTestingUtility.validateTableIsEnabled(
435 UTIL.getHBaseCluster().getMaster(), tableName);
436 }
437
438
439
440
441 public static <TState> void testRecoveryAndDoubleExecution(final HBaseTestingUtility testUtil,
442 final long procId, final int lastStepBeforeFailover, TState[] states) throws Exception {
443 ProcedureExecutor<MasterProcedureEnv> procExec =
444 testUtil.getHBaseCluster().getMaster().getMasterProcedureExecutor();
445 ProcedureTestingUtility.waitProcedure(procExec, procId);
446
447 for (int i = 0; i < lastStepBeforeFailover; ++i) {
448 LOG.info("Restart "+ i +" exec state: " + states[i]);
449 ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId);
450 ProcedureTestingUtility.restart(procExec);
451 ProcedureTestingUtility.waitProcedure(procExec, procId);
452 }
453 ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId);
454
455 LOG.info("Trigger master failover");
456 masterFailover(testUtil);
457
458 procExec = testUtil.getHBaseCluster().getMaster().getMasterProcedureExecutor();
459 ProcedureTestingUtility.waitProcedure(procExec, procId);
460 ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
461 }
462
463
464
465
466 public static void masterFailover(final HBaseTestingUtility testUtil)
467 throws Exception {
468 MiniHBaseCluster cluster = testUtil.getMiniHBaseCluster();
469
470
471 HMaster oldMaster = cluster.getMaster();
472 cluster.killMaster(cluster.getMaster().getServerName());
473
474
475 waitBackupMaster(testUtil, oldMaster);
476 }
477
478 public static void waitBackupMaster(final HBaseTestingUtility testUtil,
479 final HMaster oldMaster) throws Exception {
480 MiniHBaseCluster cluster = testUtil.getMiniHBaseCluster();
481
482 HMaster newMaster = cluster.getMaster();
483 while (newMaster == null || newMaster == oldMaster) {
484 Thread.sleep(250);
485 newMaster = cluster.getMaster();
486 }
487
488 while (!(newMaster.isActiveMaster() && newMaster.isInitialized())) {
489 Thread.sleep(250);
490 }
491 }
492
493
494
495
496 private MasterProcedureEnv getMasterProcedureEnv() {
497 return getMasterProcedureExecutor().getEnvironment();
498 }
499
500 private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
501 return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor();
502 }
503
504 private FileSystem getFileSystem() {
505 return UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
506 }
507
508 private Path getRootDir() {
509 return UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
510 }
511
512 private Path getTempDir() {
513 return UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getTempDir();
514 }
515 }