1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.filter;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Comparator;
23 import java.util.List;
24 import java.util.PriorityQueue;
25
26 import org.apache.hadoop.hbase.Cell;
27 import org.apache.hadoop.hbase.KeyValueUtil;
28 import org.apache.hadoop.hbase.classification.InterfaceAudience;
29 import org.apache.hadoop.hbase.classification.InterfaceStability;
30 import org.apache.hadoop.hbase.exceptions.DeserializationException;
31 import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
32 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
33 import org.apache.hadoop.hbase.util.ByteStringer;
34 import org.apache.hadoop.hbase.util.Bytes;
35 import org.apache.hadoop.hbase.util.Pair;
36 import org.apache.hadoop.hbase.util.UnsafeAccess;
37
38 import com.google.common.annotations.VisibleForTesting;
39 import com.google.protobuf.InvalidProtocolBufferException;
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59 @InterfaceAudience.Public
60 @InterfaceStability.Evolving
61 public class FuzzyRowFilter extends FilterBase {
62 private List<Pair<byte[], byte[]>> fuzzyKeysData;
63 private boolean done = false;
64
65
66
67
68
69
70 private int lastFoundIndex = -1;
71
72
73
74
75 private RowTracker tracker;
76
77 public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
78 Pair<byte[], byte[]> p;
79 for (int i = 0; i < fuzzyKeysData.size(); i++) {
80 p = fuzzyKeysData.get(i);
81 if (p.getFirst().length != p.getSecond().length) {
82 Pair<String, String> readable =
83 new Pair<String, String>(Bytes.toStringBinary(p.getFirst()), Bytes.toStringBinary(p
84 .getSecond()));
85 throw new IllegalArgumentException("Fuzzy pair lengths do not match: " + readable);
86 }
87
88 p.setSecond(preprocessMask(p.getSecond()));
89 preprocessSearchKey(p);
90 }
91 this.fuzzyKeysData = fuzzyKeysData;
92 this.tracker = new RowTracker();
93 }
94
95 private void preprocessSearchKey(Pair<byte[], byte[]> p) {
96 if (UnsafeAccess.unaligned() == false) {
97 return;
98 }
99 byte[] key = p.getFirst();
100 byte[] mask = p.getSecond();
101 for (int i = 0; i < mask.length; i++) {
102
103 if (mask[i] == 2) {
104 key[i] = 0;
105 }
106 }
107 }
108
109
110
111
112
113
114
115 private byte[] preprocessMask(byte[] mask) {
116 if (UnsafeAccess.unaligned() == false) {
117 return mask;
118 }
119 if (isPreprocessedMask(mask)) return mask;
120 for (int i = 0; i < mask.length; i++) {
121 if (mask[i] == 0) {
122 mask[i] = -1;
123 } else if (mask[i] == 1) {
124 mask[i] = 2;
125 }
126 }
127 return mask;
128 }
129
130 private boolean isPreprocessedMask(byte[] mask) {
131 for (int i = 0; i < mask.length; i++) {
132 if (mask[i] != -1 && mask[i] != 2) {
133 return false;
134 }
135 }
136 return true;
137 }
138
139 @Override
140 public ReturnCode filterKeyValue(Cell c) {
141 final int startIndex = lastFoundIndex >= 0 ? lastFoundIndex : 0;
142 final int size = fuzzyKeysData.size();
143 for (int i = startIndex; i < size + startIndex; i++) {
144 final int index = i % size;
145 Pair<byte[], byte[]> fuzzyData = fuzzyKeysData.get(index);
146
147 for (int j = 0; j < fuzzyData.getSecond().length; j++) {
148 fuzzyData.getSecond()[j] >>= 2;
149 }
150 SatisfiesCode satisfiesCode =
151 satisfies(isReversed(), c.getRowArray(), c.getRowOffset(), c.getRowLength(),
152 fuzzyData.getFirst(), fuzzyData.getSecond());
153 if (satisfiesCode == SatisfiesCode.YES) {
154 lastFoundIndex = index;
155 return ReturnCode.INCLUDE;
156 }
157 }
158
159 lastFoundIndex = -1;
160
161 return ReturnCode.SEEK_NEXT_USING_HINT;
162
163 }
164
165 @Override
166 public Cell getNextCellHint(Cell currentCell) {
167 boolean result = tracker.updateTracker(currentCell);
168 if (result == false) {
169 done = true;
170 return null;
171 }
172 byte[] nextRowKey = tracker.nextRow();
173 return KeyValueUtil.createFirstOnRow(nextRowKey);
174 }
175
176
177
178
179
180
181
182
183
184 private class RowTracker {
185 private final PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>> nextRows;
186 private boolean initialized = false;
187
188 RowTracker() {
189 nextRows =
190 new PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>>(fuzzyKeysData.size(),
191 new Comparator<Pair<byte[], Pair<byte[], byte[]>>>() {
192 @Override
193 public int compare(Pair<byte[], Pair<byte[], byte[]>> o1,
194 Pair<byte[], Pair<byte[], byte[]>> o2) {
195 int compare = Bytes.compareTo(o1.getFirst(), o2.getFirst());
196 if (!isReversed()) {
197 return compare;
198 } else {
199 return -compare;
200 }
201 }
202 });
203 }
204
205 byte[] nextRow() {
206 if (nextRows.isEmpty()) {
207 throw new IllegalStateException(
208 "NextRows should not be empty, make sure to call nextRow() after updateTracker() return true");
209 } else {
210 return nextRows.peek().getFirst();
211 }
212 }
213
214 boolean updateTracker(Cell currentCell) {
215 if (!initialized) {
216 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
217 updateWith(currentCell, fuzzyData);
218 }
219 initialized = true;
220 } else {
221 while (!nextRows.isEmpty() && !lessThan(currentCell, nextRows.peek().getFirst())) {
222 Pair<byte[], Pair<byte[], byte[]>> head = nextRows.poll();
223 Pair<byte[], byte[]> fuzzyData = head.getSecond();
224 updateWith(currentCell, fuzzyData);
225 }
226 }
227 return !nextRows.isEmpty();
228 }
229
230 boolean lessThan(Cell currentCell, byte[] nextRowKey) {
231 int compareResult =
232 Bytes.compareTo(currentCell.getRowArray(), currentCell.getRowOffset(),
233 currentCell.getRowLength(), nextRowKey, 0, nextRowKey.length);
234 return (!isReversed() && compareResult < 0) || (isReversed() && compareResult > 0);
235 }
236
237 void updateWith(Cell currentCell, Pair<byte[], byte[]> fuzzyData) {
238 byte[] nextRowKeyCandidate =
239 getNextForFuzzyRule(isReversed(), currentCell.getRowArray(), currentCell.getRowOffset(),
240 currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond());
241 if (nextRowKeyCandidate != null) {
242 nextRows.add(new Pair<byte[], Pair<byte[], byte[]>>(nextRowKeyCandidate, fuzzyData));
243 }
244 }
245
246 }
247
248 @Override
249 public boolean filterAllRemaining() {
250 return done;
251 }
252
253
254
255
256 public byte[] toByteArray() {
257 FilterProtos.FuzzyRowFilter.Builder builder = FilterProtos.FuzzyRowFilter.newBuilder();
258 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
259 BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder();
260 bbpBuilder.setFirst(ByteStringer.wrap(fuzzyData.getFirst()));
261 bbpBuilder.setSecond(ByteStringer.wrap(fuzzyData.getSecond()));
262 builder.addFuzzyKeysData(bbpBuilder);
263 }
264 return builder.build().toByteArray();
265 }
266
267
268
269
270
271
272
273 public static FuzzyRowFilter parseFrom(final byte[] pbBytes) throws DeserializationException {
274 FilterProtos.FuzzyRowFilter proto;
275 try {
276 proto = FilterProtos.FuzzyRowFilter.parseFrom(pbBytes);
277 } catch (InvalidProtocolBufferException e) {
278 throw new DeserializationException(e);
279 }
280 int count = proto.getFuzzyKeysDataCount();
281 ArrayList<Pair<byte[], byte[]>> fuzzyKeysData = new ArrayList<Pair<byte[], byte[]>>(count);
282 for (int i = 0; i < count; ++i) {
283 BytesBytesPair current = proto.getFuzzyKeysData(i);
284 byte[] keyBytes = current.getFirst().toByteArray();
285 byte[] keyMeta = current.getSecond().toByteArray();
286 fuzzyKeysData.add(new Pair<byte[], byte[]>(keyBytes, keyMeta));
287 }
288 return new FuzzyRowFilter(fuzzyKeysData);
289 }
290
291 @Override
292 public String toString() {
293 final StringBuilder sb = new StringBuilder();
294 sb.append("FuzzyRowFilter");
295 sb.append("{fuzzyKeysData=");
296 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
297 sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":");
298 sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}');
299 }
300 sb.append("}, ");
301 return sb.toString();
302 }
303
304
305
306 static enum SatisfiesCode {
307
308 YES,
309
310 NEXT_EXISTS,
311
312 NO_NEXT
313 }
314
315 @VisibleForTesting
316 static SatisfiesCode satisfies(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
317 return satisfies(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
318 }
319
320 @VisibleForTesting
321 static SatisfiesCode satisfies(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
322 byte[] fuzzyKeyMeta) {
323 return satisfies(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
324 }
325
326 static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int length,
327 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
328
329 if (UnsafeAccess.unaligned() == false) {
330 return satisfiesNoUnsafe(reverse, row, offset, length, fuzzyKeyBytes, fuzzyKeyMeta);
331 }
332
333 if (row == null) {
334
335 return SatisfiesCode.YES;
336 }
337 length = Math.min(length, fuzzyKeyBytes.length);
338 int numWords = length / Bytes.SIZEOF_LONG;
339 int offsetAdj = offset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
340
341 int j = numWords << 3;
342
343 for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) {
344
345 long fuzzyBytes =
346 UnsafeAccess.theUnsafe.getLong(fuzzyKeyBytes, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
347 + (long) i);
348 long fuzzyMeta =
349 UnsafeAccess.theUnsafe.getLong(fuzzyKeyMeta, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
350 + (long) i);
351 long rowValue = UnsafeAccess.theUnsafe.getLong(row, offsetAdj + (long) i);
352 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
353
354 return SatisfiesCode.NEXT_EXISTS;
355 }
356 }
357
358 int off = j;
359
360 if (length - off >= Bytes.SIZEOF_INT) {
361 int fuzzyBytes =
362 UnsafeAccess.theUnsafe.getInt(fuzzyKeyBytes, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
363 + (long) off);
364 int fuzzyMeta =
365 UnsafeAccess.theUnsafe.getInt(fuzzyKeyMeta, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
366 + (long) off);
367 int rowValue = UnsafeAccess.theUnsafe.getInt(row, offsetAdj + (long) off);
368 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
369
370 return SatisfiesCode.NEXT_EXISTS;
371 }
372 off += Bytes.SIZEOF_INT;
373 }
374
375 if (length - off >= Bytes.SIZEOF_SHORT) {
376 short fuzzyBytes =
377 UnsafeAccess.theUnsafe.getShort(fuzzyKeyBytes, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
378 + (long) off);
379 short fuzzyMeta =
380 UnsafeAccess.theUnsafe.getShort(fuzzyKeyMeta, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
381 + (long) off);
382 short rowValue = UnsafeAccess.theUnsafe.getShort(row, offsetAdj + (long) off);
383 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
384
385
386
387 return SatisfiesCode.NEXT_EXISTS;
388 }
389 off += Bytes.SIZEOF_SHORT;
390 }
391
392 if (length - off >= Bytes.SIZEOF_BYTE) {
393 int fuzzyBytes = fuzzyKeyBytes[off] & 0xff;
394 int fuzzyMeta = fuzzyKeyMeta[off] & 0xff;
395 int rowValue = row[offset + off] & 0xff;
396 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
397
398 return SatisfiesCode.NEXT_EXISTS;
399 }
400 }
401 return SatisfiesCode.YES;
402 }
403
404 static SatisfiesCode satisfiesNoUnsafe(boolean reverse, byte[] row, int offset, int length,
405 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
406 if (row == null) {
407
408 return SatisfiesCode.YES;
409 }
410
411 Order order = Order.orderFor(reverse);
412 boolean nextRowKeyCandidateExists = false;
413
414 for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) {
415
416 boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0;
417 boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset];
418 if (fixedByteIncorrect) {
419
420 if (nextRowKeyCandidateExists) {
421 return SatisfiesCode.NEXT_EXISTS;
422 }
423
424
425
426
427 boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF);
428 if (rowByteLessThanFixed && !reverse) {
429 return SatisfiesCode.NEXT_EXISTS;
430 } else if (!rowByteLessThanFixed && reverse) {
431 return SatisfiesCode.NEXT_EXISTS;
432 } else {
433 return SatisfiesCode.NO_NEXT;
434 }
435 }
436
437
438
439
440
441
442
443 if (fuzzyKeyMeta[i] == 1 && !order.isMax(fuzzyKeyBytes[i])) {
444 nextRowKeyCandidateExists = true;
445 }
446 }
447 return SatisfiesCode.YES;
448 }
449
450 @VisibleForTesting
451 static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
452 return getNextForFuzzyRule(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
453 }
454
455 @VisibleForTesting
456 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
457 byte[] fuzzyKeyMeta) {
458 return getNextForFuzzyRule(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
459 }
460
461
462 private enum Order {
463 ASC {
464 public boolean lt(int lhs, int rhs) {
465 return lhs < rhs;
466 }
467
468 public boolean gt(int lhs, int rhs) {
469 return lhs > rhs;
470 }
471
472 public byte inc(byte val) {
473
474 return (byte) (val + 1);
475 }
476
477 public boolean isMax(byte val) {
478 return val == (byte) 0xff;
479 }
480
481 public byte min() {
482 return 0;
483 }
484 },
485 DESC {
486 public boolean lt(int lhs, int rhs) {
487 return lhs > rhs;
488 }
489
490 public boolean gt(int lhs, int rhs) {
491 return lhs < rhs;
492 }
493
494 public byte inc(byte val) {
495
496 return (byte) (val - 1);
497 }
498
499 public boolean isMax(byte val) {
500 return val == 0;
501 }
502
503 public byte min() {
504 return (byte) 0xFF;
505 }
506 };
507
508 public static Order orderFor(boolean reverse) {
509 return reverse ? DESC : ASC;
510 }
511
512
513 public abstract boolean lt(int lhs, int rhs);
514
515
516 public abstract boolean gt(int lhs, int rhs);
517
518
519 public abstract byte inc(byte val);
520
521
522 public abstract boolean isMax(byte val);
523
524
525 public abstract byte min();
526 }
527
528
529
530
531
532 @VisibleForTesting
533 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length,
534 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
535
536
537
538
539
540
541
542
543 byte[] result =
544 Arrays.copyOf(fuzzyKeyBytes, length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length);
545 if (reverse && length > fuzzyKeyBytes.length) {
546
547 for (int i = fuzzyKeyBytes.length; i < result.length; i++) {
548 result[i] = (byte) 0xFF;
549 }
550 }
551 int toInc = -1;
552 final Order order = Order.orderFor(reverse);
553
554 boolean increased = false;
555 for (int i = 0; i < result.length; i++) {
556 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0
557 result[i] = row[offset + i];
558 if (!order.isMax(row[offset + i])) {
559
560 toInc = i;
561 }
562 } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == -1
563 if (order.lt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
564
565
566 increased = true;
567 break;
568 }
569
570 if (order.gt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
571
572
573
574 break;
575 }
576 }
577 }
578
579 if (!increased) {
580 if (toInc < 0) {
581 return null;
582 }
583 result[toInc] = order.inc(result[toInc]);
584
585
586
587 for (int i = toInc + 1; i < result.length; i++) {
588 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0
589 result[i] = order.min();
590 }
591 }
592 }
593
594 return reverse? result: trimTrailingZeroes(result, fuzzyKeyMeta, toInc);
595 }
596
597
598
599
600
601
602
603
604
605
606
607
608
609 private static byte[] trimTrailingZeroes(byte[] result, byte[] fuzzyKeyMeta, int toInc) {
610 int off = fuzzyKeyMeta.length >= result.length? result.length -1:
611 fuzzyKeyMeta.length -1;
612 for( ; off >= 0; off--){
613 if(fuzzyKeyMeta[off] != 0) break;
614 }
615 if (off < toInc) off = toInc;
616 byte[] retValue = new byte[off+1];
617 System.arraycopy(result, 0, retValue, 0, retValue.length);
618 return retValue;
619 }
620
621
622
623
624
625 boolean areSerializedFieldsEqual(Filter o) {
626 if (o == this) return true;
627 if (!(o instanceof FuzzyRowFilter)) return false;
628
629 FuzzyRowFilter other = (FuzzyRowFilter) o;
630 if (this.fuzzyKeysData.size() != other.fuzzyKeysData.size()) return false;
631 for (int i = 0; i < fuzzyKeysData.size(); ++i) {
632 Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i);
633 Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i);
634 if (!(Bytes.equals(thisData.getFirst(), otherData.getFirst()) && Bytes.equals(
635 thisData.getSecond(), otherData.getSecond()))) {
636 return false;
637 }
638 }
639 return true;
640 }
641 }