1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.util;
19
20 import java.lang.management.GarbageCollectorMXBean;
21 import java.lang.management.ManagementFactory;
22 import java.util.List;
23 import java.util.Map;
24 import java.util.Set;
25
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.hadoop.hbase.classification.InterfaceAudience;
29 import org.apache.hadoop.hbase.metrics.JvmPauseMonitorSource;
30 import org.apache.hadoop.conf.Configuration;
31
32 import com.google.common.base.Joiner;
33 import com.google.common.base.Preconditions;
34 import com.google.common.base.Stopwatch;
35 import com.google.common.collect.Lists;
36 import com.google.common.collect.Maps;
37 import com.google.common.collect.Sets;
38
39
40
41
42
43
44
45
46
47
48
49
50
51 @InterfaceAudience.Private
52 public class JvmPauseMonitor {
53 private static final Log LOG = LogFactory.getLog(JvmPauseMonitor.class);
54
55
56 private static final long SLEEP_INTERVAL_MS = 500;
57
58
59 private final long warnThresholdMs;
60 public static final String WARN_THRESHOLD_KEY =
61 "jvm.pause.warn-threshold.ms";
62 private static final long WARN_THRESHOLD_DEFAULT = 10000;
63
64
65 private final long infoThresholdMs;
66 public static final String INFO_THRESHOLD_KEY =
67 "jvm.pause.info-threshold.ms";
68 private static final long INFO_THRESHOLD_DEFAULT = 1000;
69
70 private Thread monitorThread;
71 private volatile boolean shouldRun = true;
72 private JvmPauseMonitorSource metricsSource;
73
74 public JvmPauseMonitor(Configuration conf) {
75 this(conf, null);
76 }
77
78 public JvmPauseMonitor(Configuration conf, JvmPauseMonitorSource metricsSource) {
79 this.warnThresholdMs = conf.getLong(WARN_THRESHOLD_KEY, WARN_THRESHOLD_DEFAULT);
80 this.infoThresholdMs = conf.getLong(INFO_THRESHOLD_KEY, INFO_THRESHOLD_DEFAULT);
81 this.metricsSource = metricsSource;
82 }
83
84 public void start() {
85 Preconditions.checkState(monitorThread == null, "Already started");
86 monitorThread = new Thread(new Monitor());
87 monitorThread.setDaemon(true);
88 monitorThread.setName("JvmPauseMonitor");
89 monitorThread.start();
90 }
91
92 public void stop() {
93 shouldRun = false;
94 monitorThread.interrupt();
95 try {
96 monitorThread.join();
97 } catch (InterruptedException e) {
98 Thread.currentThread().interrupt();
99 }
100 }
101
102 private String formatMessage(long extraSleepTime, List<String> gcDiffs) {
103 String ret = "Detected pause in JVM or host machine (eg GC): " + "pause of approximately "
104 + extraSleepTime + "ms\n";
105 if (gcDiffs.isEmpty()) {
106 ret += "No GCs detected";
107 } else {
108 ret += Joiner.on("\n").join(gcDiffs);
109 }
110 return ret;
111 }
112
113 private Map<String, GcTimes> getGcTimes() {
114 Map<String, GcTimes> map = Maps.newHashMap();
115 List<GarbageCollectorMXBean> gcBeans = ManagementFactory.getGarbageCollectorMXBeans();
116 for (GarbageCollectorMXBean gcBean : gcBeans) {
117 map.put(gcBean.getName(), new GcTimes(gcBean));
118 }
119 return map;
120 }
121
122 private static class GcTimes {
123 private GcTimes(GarbageCollectorMXBean gcBean) {
124 gcCount = gcBean.getCollectionCount();
125 gcTimeMillis = gcBean.getCollectionTime();
126 }
127
128 private GcTimes(long count, long time) {
129 this.gcCount = count;
130 this.gcTimeMillis = time;
131 }
132
133 private GcTimes subtract(GcTimes other) {
134 return new GcTimes(this.gcCount - other.gcCount, this.gcTimeMillis - other.gcTimeMillis);
135 }
136
137 @Override
138 public String toString() {
139 return "count=" + gcCount + " time=" + gcTimeMillis + "ms";
140 }
141
142 private long gcCount;
143 private long gcTimeMillis;
144 }
145
146 private class Monitor implements Runnable {
147 @Override
148 public void run() {
149 Stopwatch sw = new Stopwatch();
150 Map<String, GcTimes> gcTimesBeforeSleep = getGcTimes();
151 while (shouldRun) {
152 sw.reset().start();
153 try {
154 Thread.sleep(SLEEP_INTERVAL_MS);
155 } catch (InterruptedException ie) {
156 return;
157 }
158
159 long extraSleepTime = sw.elapsedMillis() - SLEEP_INTERVAL_MS;
160 Map<String, GcTimes> gcTimesAfterSleep = getGcTimes();
161
162 if (extraSleepTime > infoThresholdMs) {
163 Set<String> gcBeanNames = Sets.intersection(gcTimesAfterSleep.keySet(),
164 gcTimesBeforeSleep.keySet());
165 List<String> gcDiffs = Lists.newArrayList();
166 for (String name : gcBeanNames) {
167 GcTimes diff = gcTimesAfterSleep.get(name).subtract(gcTimesBeforeSleep.get(name));
168 if (diff.gcCount != 0) {
169 gcDiffs.add("GC pool '" + name + "' had collection(s): " + diff.toString());
170 }
171 }
172
173 updateMetrics(extraSleepTime, !gcDiffs.isEmpty());
174
175 if (extraSleepTime > warnThresholdMs) {
176 LOG.warn(formatMessage(extraSleepTime, gcDiffs));
177 } else {
178 LOG.info(formatMessage(extraSleepTime, gcDiffs));
179 }
180 }
181 gcTimesBeforeSleep = gcTimesAfterSleep;
182 }
183 }
184 }
185
186 public void updateMetrics(long sleepTime, boolean gcDetected) {
187 if (metricsSource != null) {
188 if (sleepTime > warnThresholdMs) {
189 metricsSource.incWarnThresholdExceeded(1);
190 } else {
191 metricsSource.incInfoThresholdExceeded(1);
192 }
193 if (gcDetected) {
194 metricsSource.updatePauseTimeWithGc(sleepTime);
195 } else {
196 metricsSource.updatePauseTimeWithoutGc(sleepTime);
197 }
198 }
199 }
200
201 public JvmPauseMonitorSource getMetricsSource() {
202 return metricsSource;
203 }
204
205 public void setMetricsSource(JvmPauseMonitorSource metricsSource) {
206 this.metricsSource = metricsSource;
207 }
208
209
210
211
212
213
214
215
216 public static void main(String []args) throws Exception {
217 new JvmPauseMonitor(new Configuration()).start();
218 List<String> list = Lists.newArrayList();
219 int i = 0;
220 while (true) {
221 list.add(String.valueOf(i++));
222 }
223 }
224 }