View Javadoc

1   /*
2    * Copyright (c) 2011.  The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.hbql.filter;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.KeyValue;
26  import org.apache.hadoop.hbase.filter.BinaryComparator;
27  import org.apache.hadoop.hbase.filter.CompareFilter;
28  import org.apache.hadoop.hbase.filter.WritableByteArrayComparable;
29  import org.apache.hadoop.hbase.io.HbaseObjectWritable;
30  import org.apache.hadoop.hbase.util.Bytes;
31  
32  import java.io.DataInput;
33  import java.io.DataOutput;
34  import java.io.IOException;
35  import java.util.Arrays;
36  
37  
38  /**
39   * This filter is used to filter cells based on value. It takes a
40   * {@link org.apache.hadoop.hbase.filter.CompareFilter.CompareOp}
41   * operator (equal, greater, not equal, etc), and either a byte [] value or
42   * a {@link org.apache.hadoop.hbase.filter.WritableByteArrayComparable}.
43   * <p/>
44   * If we have a byte [] value then we just do a lexicographic compare. For
45   * example, if passed value is 'b' and cell has 'a' and the compare operator
46   * is LESS, then we will filter out this cell (return true).  If this is not
47   * sufficient (eg you want to deserialize a long and then compare it to a fixed
48   * long value), then you can pass in your own comparator instead.
49   * <p/>
50   * You must also specify a family and qualifier.  Only the value of this column
51   * will be tested.
52   * <p/>
53   * To prevent the entire row from being emitted if the column is not found
54   * on a row, use {@link #setFilterIfMissing}.
55   * Otherwise, if the column is found, the entire row will be emitted only if
56   * the value passes.  If the value fails, the row will be filtered out.
57   * <p/>
58   * In order to test values of previous versions (timestamps), set
59   * {@link #setLatestVersionOnly} to false. The default is true, meaning that
60   * only the latest version's value is tested and all previous versions are ignored.
61   * <p/>
62   * To filter based on the value of all scanned columns, use {@link org.apache.hadoop.hbase.filter.ValueFilter}.
63   */
64  public class SingleColumnValueFilter extends InstrumentedFilter {
65      static final Log LOG = LogFactory.getLog(SingleColumnValueFilter.class);
66  
67      private boolean verbose = false;
68      private byte[]                      columnFamily;
69      private byte[]                      columnQualifier;
70      private CompareFilter.CompareOp     compareOp;
71      private WritableByteArrayComparable comparator;
72      private boolean foundColumn       = false;
73      private boolean matchedColumn     = false;
74      private boolean filterIfMissing   = false;
75      private boolean latestVersionOnly = true;
76  
77      /**
78       * Writable constructor, do not use.
79       */
80      public SingleColumnValueFilter() {
81      }
82  
83      /**
84       * Constructor for binary compare of the value of a single column.  If the
85       * column is found and the condition passes, all columns of the row will be
86       * emitted.  If the column is not found or the condition fails, the row will
87       * not be emitted.
88       *
89       * @param family    name of column family
90       * @param qualifier name of column qualifier
91       * @param compareOp operator
92       * @param value     value to compare column values against
93       */
94      public SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
95                                     final CompareFilter.CompareOp compareOp, final byte[] value) {
96          this(family, qualifier, compareOp, new BinaryComparator(value));
97      }
98  
99      /**
100      * Constructor for binary compare of the value of a single column.  If the
101      * column is found and the condition passes, all columns of the row will be
102      * emitted.  If the condition fails, the row will not be emitted.
103      * <p/>
104      * Use the filterIfColumnMissing flag to set whether the rest of the columns
105      * in a row will be emitted if the specified column to check is not found in
106      * the row.
107      *
108      * @param family     name of column family
109      * @param qualifier  name of column qualifier
110      * @param compareOp  operator
111      * @param comparator Comparator to use.
112      */
113     public SingleColumnValueFilter(final byte[] family, final byte[] qualifier,
114                                    final CompareFilter.CompareOp compareOp, final WritableByteArrayComparable comparator) {
115         this.columnFamily = family;
116         this.columnQualifier = qualifier;
117         this.compareOp = compareOp;
118         this.comparator = comparator;
119     }
120 
121     public void setVerbose(final boolean verbose) {
122         this.verbose = verbose;
123     }
124 
125     public boolean getVerbose() {
126         return this.verbose;
127     }
128 
129     public boolean filterRowKey(byte[] rowKey, int offset, int length) {
130         // We don't filter on the row key... we filter later on column value so
131         // always return false.
132         return false;
133     }
134 
135     public ReturnCode filterKeyValue(KeyValue keyValue) {
136         // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" + Bytes.toString(keyValue.getValue()));
137         if (this.matchedColumn) {
138             // We already found and matched the single column, all keys now pass
139             return ReturnCode.INCLUDE;
140         }
141         else if (this.latestVersionOnly && this.foundColumn) {
142             // We found but did not match the single column, skip to next row
143             return ReturnCode.NEXT_ROW;
144         }
145         if (!keyValue.matchingColumn(this.columnFamily, this.columnQualifier)) {
146             return ReturnCode.INCLUDE;
147         }
148         foundColumn = true;
149         if (filterColumnValue(keyValue.getBuffer(),
150                               keyValue.getValueOffset(), keyValue.getValueLength())) {
151             return this.latestVersionOnly ? ReturnCode.NEXT_ROW : ReturnCode.INCLUDE;
152         }
153         this.matchedColumn = true;
154         return ReturnCode.INCLUDE;
155     }
156 
157     private boolean filterColumnValue(final byte[] data, final int offset,
158                                       final int length) {
159         // TODO: Can this filter take a rawcomparator so don't have to make this
160         // byte array copy?
161         int compareResult = this.comparator.compareTo(Arrays.copyOfRange(data, offset, offset + length));
162 
163         if (this.getVerbose())
164             LOG.debug("compareResult=" + compareResult + " " + Bytes.toString(data, offset, length));
165 
166         switch (this.compareOp) {
167             case LESS:
168                 return compareResult <= 0;
169             case LESS_OR_EQUAL:
170                 return compareResult < 0;
171             case EQUAL:
172                 return compareResult != 0;
173             case NOT_EQUAL:
174                 return compareResult == 0;
175             case GREATER_OR_EQUAL:
176                 return compareResult > 0;
177             case GREATER:
178                 return compareResult >= 0;
179             default:
180                 throw new RuntimeException("Unknown Compare op " + compareOp.name());
181         }
182     }
183 
184     public boolean filterAllRemaining() {
185         return false;
186     }
187 
188     public boolean filterRow() {
189         // If column was found, return false if it was matched, true if it was not
190         // If column not found, return true if we filter if missing, false if not
191         return this.foundColumn ? !this.matchedColumn : this.filterIfMissing;
192     }
193 
194     public void reset() {
195         foundColumn = false;
196         matchedColumn = false;
197     }
198 
199     /**
200      * Get whether entire row should be filtered if column is not found.
201      *
202      * @return true if row should be skipped if column not found, false if row
203      *         should be let through anyways
204      */
205     public boolean getFilterIfMissing() {
206         return filterIfMissing;
207     }
208 
209     /**
210      * Set whether entire row should be filtered if column is not found.
211      * <p/>
212      * If true, the entire row will be skipped if the column is not found.
213      * <p/>
214      * If false, the row will pass if the column is not found.  This is default.
215      */
216     public void setFilterIfMissing(boolean filterIfMissing) {
217         this.filterIfMissing = filterIfMissing;
218     }
219 
220     /**
221      * Get whether only the latest version of the column value should be compared.
222      * If true, the row will be returned if only the latest version of the column
223      * value matches. If false, the row will be returned if any version of the
224      * column value matches. The default is true.
225      */
226     public boolean getLatestVersionOnly() {
227         return latestVersionOnly;
228     }
229 
230     /**
231      * Set whether only the latest version of the column value should be compared.
232      * If true, the row will be returned if only the latest version of the column
233      * value matches. If false, the row will be returned if any version of the
234      * column value matches. The default is true.
235      */
236     public void setLatestVersionOnly(boolean latestVersionOnly) {
237         this.latestVersionOnly = latestVersionOnly;
238     }
239 
240     public void readFields(final DataInput in) throws IOException {
241 
242         this.verbose = in.readBoolean();
243 
244         this.columnFamily = Bytes.readByteArray(in);
245         if (this.columnFamily.length == 0) {
246             this.columnFamily = null;
247         }
248         this.columnQualifier = Bytes.readByteArray(in);
249         if (this.columnQualifier.length == 0) {
250             this.columnQualifier = null;
251         }
252         this.compareOp = CompareFilter.CompareOp.valueOf(in.readUTF());
253         this.comparator =
254                 (WritableByteArrayComparable)HbaseObjectWritable.readObject(in, null);
255         this.foundColumn = in.readBoolean();
256         this.matchedColumn = in.readBoolean();
257         this.filterIfMissing = in.readBoolean();
258         this.latestVersionOnly = in.readBoolean();
259     }
260 
261     public void write(final DataOutput out) throws IOException {
262 
263         out.writeBoolean(this.getVerbose());
264 
265         Bytes.writeByteArray(out, this.columnFamily);
266         Bytes.writeByteArray(out, this.columnQualifier);
267         out.writeUTF(compareOp.name());
268         HbaseObjectWritable.writeObject(out, comparator,
269                                         WritableByteArrayComparable.class, null);
270         out.writeBoolean(foundColumn);
271         out.writeBoolean(matchedColumn);
272         out.writeBoolean(filterIfMissing);
273         out.writeBoolean(latestVersionOnly);
274     }
275 }