1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.omid.committable.hbase; 19 20 import org.apache.phoenix.thirdparty.com.google.common.base.Preconditions; 21 22 import org.apache.commons.lang.ArrayUtils; 23 import org.apache.hadoop.conf.Configuration; 24 import org.apache.hadoop.hbase.util.Bytes; 25 26 import java.io.IOException; 27 import java.util.Arrays; 28 29 /** 30 * This class contains only the required behavior of the original 31 * org.apache.hadoop.hbase.util.RegionSplitter class to avoid 32 * having a reference to hbase-testing-util, which transitively 33 * imports hbase-server causing dependency conflicts for this module. 34 */ 35 public class RegionSplitter { 36 37 /** 38 * A generic interface for the RegionSplitter code to use for all it's functionality. Note that the original authors 39 * of this code use see org.apache.hadoop.hbase.util.HexStringSplit to partition their table and set it as default, but provided this for 40 * your custom algorithm. To use, create a new derived class from this interface and call 41 * see RegionSplitter#createPresplitTable or 42 * see RegionSplitter#rollingSplit(String, SplitAlgorithm, Configuration)} with the argument splitClassName 43 * giving the name of your class. 44 */ 45 public interface SplitAlgorithm { 46 47 /** 48 * Split a pre-existing region into 2 regions. 49 * 50 * @param start 51 * first row (inclusive) 52 * @param end 53 * last row (exclusive) 54 * @return the split row to use 55 */ 56 byte[] split(byte[] start, byte[] end); 57 58 /** 59 * Split an entire table. 60 * 61 * @param numRegions 62 * number of regions to split the table into 63 * 64 * @throws RuntimeException 65 * user input is validated at this time. may throw a runtime exception in response to a parse 66 * failure 67 * @return array of split keys for the initial regions of the table. The length of the returned array should be 68 * numRegions-1. 69 */ 70 byte[][] split(int numRegions); 71 72 /** 73 * In HBase, the first row is represented by an empty byte array. This might cause problems with your split 74 * algorithm or row printing. All your APIs will be passed firstRow() instead of empty array. 75 * 76 * @return your representation of your first row 77 */ 78 byte[] firstRow(); 79 80 /** 81 * In HBase, the last row is represented by an empty byte array. This might cause problems with your split 82 * algorithm or row printing. All your APIs will be passed firstRow() instead of empty array. 83 * 84 * @return your representation of your last row 85 */ 86 byte[] lastRow(); 87 88 /** 89 * In HBase, the last row is represented by an empty byte array. Set this value to help the split code 90 * understand how to evenly divide the first region. 91 * 92 * @param userInput 93 * raw user input (may throw RuntimeException on parse failure) 94 */ 95 void setFirstRow(String userInput); 96 97 /** 98 * In HBase, the last row is represented by an empty byte array. Set this value to help the split code 99 * understand how to evenly divide the last region. Note that this last row is inclusive for all rows sharing 100 * the same prefix. 101 * 102 * @param userInput raw user input (may throw RuntimeException on parse failure) 103 */ 104 void setLastRow(String userInput); 105 106 /** 107 * @param input 108 * user or file input for row 109 * @return byte array representation of this row for HBase 110 */ 111 byte[] strToRow(String input); 112 113 /** 114 * @param row byte array representing a row in HBase 115 * @return String to use for debug and file printing 116 */ 117 String rowToStr(byte[] row); 118 119 /** 120 * @return the separator character to use when storing / printing the row 121 */ 122 String separator(); 123 124 /** 125 * Set the first row 126 * 127 * @param userInput 128 * byte array of the row key. 129 */ 130 void setFirstRow(byte[] userInput); 131 132 /** 133 * Set the last row 134 * 135 * @param userInput 136 * byte array of the row key. 137 */ 138 void setLastRow(byte[] userInput); 139 } 140 141 /** 142 * @param conf Hbase conf 143 * @param splitClassName split class name to be used 144 * @return an instance of SplitAlgorithm 145 * @throws IOException if the specified SplitAlgorithm class couldn't be instantiated 146 */ 147 public static SplitAlgorithm newSplitAlgoInstance(Configuration conf, 148 String splitClassName) throws IOException { 149 Class<?> splitClass; 150 151 // For split algorithms builtin to RegionSplitter, the user can specify 152 // their simple class name instead of a fully qualified class name. 153 if (splitClassName.equals(UniformSplit.class.getSimpleName())) { 154 splitClass = UniformSplit.class; 155 } else { 156 try { 157 splitClass = conf.getClassByName(splitClassName); 158 } catch (ClassNotFoundException e) { 159 throw new IOException("Couldn't load split class " + splitClassName, e); 160 } 161 if (splitClass == null) { 162 throw new IOException("Failed loading split class " + splitClassName); 163 } 164 if (!SplitAlgorithm.class.isAssignableFrom(splitClass)) { 165 throw new IOException( 166 "Specified split class doesn't implement SplitAlgorithm"); 167 } 168 } 169 try { 170 return splitClass.asSubclass(SplitAlgorithm.class).newInstance(); 171 } catch (Exception e) { 172 throw new IOException("Problem loading split algorithm: ", e); 173 } 174 } 175 176 /** 177 * A SplitAlgorithm that divides the space of possible keys evenly. Useful when the keys are approximately uniform 178 * random bytes (e.g. hashes). Rows are raw byte values in the range [00..FF] and are right-padded with zeros 179 * to keep the same memcmp() order. This is the natural algorithm to use for a byte[] environment and saves space, 180 * but is not necessarily the easiest for readability. 181 */ 182 public static class UniformSplit implements SplitAlgorithm { 183 184 static final byte xFF = (byte) 0xFF; 185 byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY; 186 byte[] lastRowBytes = 187 new byte[]{xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF}; 188 189 public byte[] split(byte[] start, byte[] end) { 190 return Bytes.split(start, end, 1)[1]; 191 } 192 193 @Override 194 public byte[][] split(int numRegions) { 195 Preconditions.checkArgument( 196 Bytes.compareTo(lastRowBytes, firstRowBytes) > 0, 197 "last row (%s) is configured less than first row (%s)", 198 Bytes.toStringBinary(lastRowBytes), 199 Bytes.toStringBinary(firstRowBytes)); 200 201 byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true, 202 numRegions - 1); 203 Preconditions.checkState(splits != null, 204 "Could not split region with given user input: " + this); 205 206 // remove endpoints, which are included in the splits list 207 return Arrays.copyOfRange(splits, 1, splits.length - 1); 208 } 209 210 @Override 211 public byte[] firstRow() { 212 return firstRowBytes; 213 } 214 215 @Override 216 public byte[] lastRow() { 217 return lastRowBytes; 218 } 219 220 @Override 221 public void setFirstRow(String userInput) { 222 firstRowBytes = Bytes.toBytesBinary(userInput); 223 } 224 225 @Override 226 public void setLastRow(String userInput) { 227 lastRowBytes = Bytes.toBytesBinary(userInput); 228 } 229 230 @Override 231 public void setFirstRow(byte[] userInput) { 232 firstRowBytes = userInput; 233 } 234 235 @Override 236 public void setLastRow(byte[] userInput) { 237 lastRowBytes = userInput; 238 } 239 240 @Override 241 public byte[] strToRow(String input) { 242 return Bytes.toBytesBinary(input); 243 } 244 245 @Override 246 public String rowToStr(byte[] row) { 247 return Bytes.toStringBinary(row); 248 } 249 250 @Override 251 public String separator() { 252 return ","; 253 } 254 255 @Override 256 public String toString() { 257 return this.getClass().getSimpleName() + " [" + rowToStr(firstRow()) 258 + "," + rowToStr(lastRow()) + "]"; 259 } 260 } 261 }