View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.util.concurrent.atomic.AtomicBoolean;
21  import java.util.concurrent.atomic.AtomicLong;
22  
23  import org.apache.hadoop.hbase.classification.InterfaceAudience;
24  import org.apache.hadoop.hbase.classification.InterfaceStability;
25  
26  /**
27   * FastLongHistogram is a thread-safe class that estimate distribution of data and computes the
28   * quantiles.
29   */
30  @InterfaceAudience.Public
31  @InterfaceStability.Evolving
32  public class FastLongHistogram {
33  
34    /**
35     * Default number of bins.
36     */
37    public static final int DEFAULT_NBINS = 255;
38  
39    public static final double[] DEFAULT_QUANTILES =
40        new double[]{0.25, 0.5, 0.75, 0.90, 0.95, 0.98, 0.99, 0.999};
41  
42    /**
43     * Bins is a class containing a list of buckets(or bins) for estimation histogram of some data.
44     */
45    private static class Bins {
46      private final Counter[] counts;
47      // inclusive
48      private final long binsMin;
49      // exclusive
50      private final long binsMax;
51      private final long bins10XMax;
52      private final AtomicLong min = new AtomicLong(Long.MAX_VALUE);
53      private final AtomicLong max = new AtomicLong(0L);
54  
55      private final Counter count = new Counter(0);
56      private final Counter total = new Counter(0);
57  
58      // set to true when any of data has been inserted to the Bins. It is set after the counts are
59      // updated.
60      private final AtomicBoolean hasData = new AtomicBoolean(false);
61  
62      /**
63       * The constructor for creating a Bins without any prior data.
64       */
65      public Bins(int numBins) {
66        counts = createCounters(numBins + 3);
67        this.binsMin = 1L;
68  
69        // These two numbers are total guesses
70        // and should be treated as highly suspect.
71        this.binsMax = 1000;
72        this.bins10XMax = binsMax * 10;
73      }
74  
75      /**
76       * The constructor for creating a Bins with last Bins.
77       */
78      public Bins(Bins last, int numOfBins, double minQ, double maxQ) {
79        long[] values = last.getQuantiles(new double[] { minQ, maxQ });
80        long wd = values[1] - values[0] + 1;
81        // expand minQ and maxQ in two ends back assuming uniform distribution
82        this.binsMin = Math.max(0L, (long) (values[0] - wd * minQ));
83        long binsMax = (long) (values[1] + wd * (1 - maxQ)) + 1;
84        // make sure each of bins is at least of width 1
85        this.binsMax = Math.max(binsMax, this.binsMin + numOfBins);
86        this.bins10XMax = Math.max((long) (values[1] + (binsMax - 1) * 9), this.binsMax + 1);
87  
88        this.counts = createCounters(numOfBins + 3);
89      }
90  
91      private Counter[] createCounters(int num) {
92        Counter[] counters = new Counter[num];
93        for (int i = 0; i < num; i++) {
94          counters[i] = new Counter();
95        }
96        return counters;
97      }
98  
99      private int getIndex(long value) {
100       if (value < this.binsMin) {
101         return 0;
102       } else if (value > this.bins10XMax) {
103         return this.counts.length - 1;
104       } else if (value >= this.binsMax) {
105         return this.counts.length - 2;
106       }
107       // compute the position
108       return 1 + (int) ((value - this.binsMin) * (this.counts.length - 3) /
109           (this.binsMax - this.binsMin));
110 
111     }
112 
113     /**
114      * Adds a value to the histogram.
115      */
116     public void add(long value, long count) {
117       if (value < 0) {
118         // The whole computation is completely thrown off if there are negative numbers
119         //
120         // Normally we would throw an IllegalArgumentException however this is the metrics
121         // system and it should be completely safe at all times.
122         // So silently throw it away.
123         return;
124       }
125       AtomicUtils.updateMin(min, value);
126       AtomicUtils.updateMax(max, value);
127 
128       this.count.add(count);
129       this.total.add(value * count);
130 
131       int pos = getIndex(value);
132       this.counts[pos].add(count);
133 
134       // hasData needs to be updated as last
135       this.hasData.set(true);
136     }
137 
138     /**
139      * Computes the quantiles give the ratios.
140      */
141     public long[] getQuantiles(double[] quantiles) {
142       if (!this.hasData.get()) {
143         // No data yet.
144         return new long[quantiles.length];
145       }
146 
147       // Make a snapshot of lowerCounter, higherCounter and bins.counts to counts.
148       // This is not synchronized, but since the counter are accumulating, the result is a good
149       // estimation of a snapshot.
150       long[] counts = new long[this.counts.length];
151       long total = 0L;
152       for (int i = 0; i < this.counts.length; i++) {
153         counts[i] = this.counts[i].get();
154         total += counts[i];
155       }
156 
157       int rIndex = 0;
158       double qCount = total * quantiles[0];
159       long cum = 0L;
160 
161       long[] res = new long[quantiles.length];
162       countsLoop: for (int i = 0; i < counts.length; i++) {
163         // mn and mx define a value range
164         long mn, mx;
165         if (i == 0) {
166           mn = this.min.get();
167           mx = this.binsMin;
168         } else if (i == counts.length - 1) {
169           mn = this.bins10XMax;
170           mx = this.max.get();
171         } else if (i == counts.length - 2) {
172           mn = this.binsMax;
173           mx = this.bins10XMax;
174         } else {
175           mn = this.binsMin + (i - 1) * (this.binsMax - this.binsMin) / (this.counts.length - 3);
176           mx = this.binsMin + i * (this.binsMax - this.binsMin) / (this.counts.length - 3);
177         }
178 
179         if (mx < this.min.get()) {
180           continue;
181         }
182         if (mn > this.max.get()) {
183           break;
184         }
185         mn = Math.max(mn, this.min.get());
186         mx = Math.min(mx, this.max.get());
187 
188         // lastCum/cum are the corresponding counts to mn/mx
189         double lastCum = cum;
190         cum += counts[i];
191 
192         // fill the results for qCount is within current range.
193         while (qCount <= cum) {
194           if (cum == lastCum) {
195             res[rIndex] = mn;
196           } else {
197             res[rIndex] = (long) ((qCount - lastCum) * (mx - mn) / (cum - lastCum) + mn);
198           }
199 
200           // move to next quantile
201           rIndex++;
202           if (rIndex >= quantiles.length) {
203             break countsLoop;
204           }
205           qCount = total * quantiles[rIndex];
206         }
207       }
208       // In case quantiles contains values >= 100%
209       for (; rIndex < quantiles.length; rIndex++) {
210         res[rIndex] = this.max.get();
211       }
212 
213       return res;
214     }
215 
216 
217     long getNumAtOrBelow(long val) {
218       final int targetIndex = getIndex(val);
219       long totalToCurrentIndex = 0;
220       for (int i = 0; i <= targetIndex; i++) {
221         totalToCurrentIndex += this.counts[i].get();
222       }
223       return  totalToCurrentIndex;
224     }
225   }
226 
227   // The bins counting values. It is replaced with a new one in calling of reset().
228   private volatile Bins bins;
229 
230   /**
231    * Constructor.
232    */
233   public FastLongHistogram() {
234     this(DEFAULT_NBINS);
235   }
236 
237   /**
238    * Constructor.
239    * @param numOfBins the number of bins for the histogram. A larger value results in more precise
240    *          results but with lower efficiency, and vice versus.
241    */
242   public FastLongHistogram(int numOfBins) {
243     this.bins = new Bins(numOfBins);
244   }
245 
246   /**
247    * Constructor setting the bins assuming a uniform distribution within a range.
248    * @param numOfBins the number of bins for the histogram. A larger value results in more precise
249    *          results but with lower efficiency, and vice versus.
250    * @param min lower bound of the region, inclusive.
251    * @param max higher bound of the region, inclusive.
252    */
253   public FastLongHistogram(int numOfBins, long min, long max) {
254     this(numOfBins);
255     Bins bins = new Bins(numOfBins);
256     bins.add(min, 1);
257     bins.add(max, 1);
258     this.bins = new Bins(bins, numOfBins, 0.01, 0.999);
259   }
260 
261   private FastLongHistogram(Bins bins) {
262     this.bins = bins;
263   }
264 
265   /**
266    * Adds a value to the histogram.
267    */
268   public void add(long value, long count) {
269     this.bins.add(value, count);
270   }
271 
272   /**
273    * Computes the quantiles give the ratios.
274    */
275   public long[] getQuantiles(double[] quantiles) {
276     return this.bins.getQuantiles(quantiles);
277   }
278 
279   public long[] getQuantiles() {
280     return this.bins.getQuantiles(DEFAULT_QUANTILES);
281   }
282 
283   public long getMin() {
284     long min = this.bins.min.get();
285     return min == Long.MAX_VALUE ? 0 : min; // in case it is not initialized
286   }
287 
288   public long getMax() {
289     return this.bins.max.get();
290   }
291 
292   public long getCount() {
293     return this.bins.count.get();
294   }
295 
296   public long getMean() {
297     Bins bins = this.bins;
298     long count = bins.count.get();
299     long total = bins.total.get();
300     if (count == 0) {
301       return 0;
302     }
303     return total / count;
304   }
305 
306   public long getNumAtOrBelow(long value) {
307     return this.bins.getNumAtOrBelow(value);
308   }
309 
310   /**
311    * Resets the histogram for new counting.
312    */
313   public FastLongHistogram reset() {
314     Bins oldBins = this.bins;
315     this.bins = new Bins(this.bins, this.bins.counts.length - 3, 0.01, 0.99);
316     return new FastLongHistogram(oldBins);
317   }
318 }