View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package com.jsql.util.bruter;
19  
20  import org.apache.commons.codec.binary.Base32;
21  
22  import java.util.Base64;
23  
24  /**
25   * Provides Base16 encoding and decoding.
26   *
27   * <p>
28   * This class is thread-safe.
29   * </p>
30   * <p>
31   * This implementation strictly follows RFC 4648, and as such unlike
32   * the {@link Base32} and {@link Base64} implementations,
33   * it does not ignore invalid alphabet characters or whitespace,
34   * neither does it offer chunking or padding characters.
35   * </p>
36   * <p>
37   * The only additional feature above those specified in RFC 4648
38   * is support for working with a lower-case alphabet in addition
39   * to the default upper-case alphabet.
40   * </p>
41   *
42   * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
43   *
44   * @since 1.15
45   */
46  public class Base16 extends BaseNCodec {
47  
48      /**
49       * BASE16 characters are 4 bits in length.
50       * They are formed by taking an 8-bit group,
51       * which is converted into two BASE16 characters.
52       */
53      private static final int BITS_PER_ENCODED_BYTE = 4;
54      private static final int BYTES_PER_ENCODED_BLOCK = 2;
55      private static final int BYTES_PER_UNENCODED_BLOCK = 1;
56  
57      /**
58       * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified
59       * in Table 5 of RFC 4648) into their 4-bit positive integer equivalents. Characters that are not in the Base16
60       * alphabet but fall within the bounds of the array are translated to -1.
61       */
62      private static final byte[] UPPER_CASE_DECODE_TABLE = {
63          //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
64          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
65          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
66          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
67           0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
68          -1, 10, 11, 12, 13, 14, 15                                      // 40-46 A-F
69      };
70  
71      /**
72       * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
73       * equivalents as specified in Table 5 of RFC 4648.
74       */
75      private static final byte[] UPPER_CASE_ENCODE_TABLE = {
76          '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
77          'A', 'B', 'C', 'D', 'E', 'F'
78      };
79  
80      /**
81       * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet"
82       * into their 4-bit positive integer equivalents. Characters that are not in the Base16
83       * alphabet but fall within the bounds of the array are translated to -1.
84       */
85      private static final byte[] LOWER_CASE_DECODE_TABLE = {
86          //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
87          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
88          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
89          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
90           0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
91          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f
92          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f
93          -1, 10, 11, 12, 13, 14, 15                                      // 60-66 a-f
94      };
95  
96      /**
97       * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
98       * lower-case equivalents.
99       */
100     private static final byte[] LOWER_CASE_ENCODE_TABLE = {
101         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
102         'a', 'b', 'c', 'd', 'e', 'f'
103     };
104 
105     /** Mask used to extract 4 bits, used when decoding character. */
106     private static final int MASK_4BITS = 0x0f;
107 
108     /**
109      * Decode table to use.
110      */
111     private final byte[] decodeTable;
112 
113     /**
114      * Encode table to use.
115      */
116     private final byte[] encodeTable;
117 
118     /**
119      * Creates a Base16 codec used for decoding and encoding.
120      */
121     public Base16() {
122         this(false);
123     }
124 
125     /**
126      * Creates a Base16 codec used for decoding and encoding.
127      *
128      * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
129      */
130     public Base16(final boolean lowerCase) {
131         this(lowerCase, DECODING_POLICY_DEFAULT);
132     }
133 
134     /**
135      * Creates a Base16 codec used for decoding and encoding.
136      *
137      * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
138      * @param decodingPolicy Decoding policy.
139      */
140     public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) {
141         
142         super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0, PAD_DEFAULT, decodingPolicy);
143         
144         if (lowerCase) {
145             
146             this.encodeTable = LOWER_CASE_ENCODE_TABLE;
147             this.decodeTable = LOWER_CASE_DECODE_TABLE;
148             
149         } else {
150             
151             this.encodeTable = UPPER_CASE_ENCODE_TABLE;
152             this.decodeTable = UPPER_CASE_DECODE_TABLE;
153         }
154     }
155 
156     @Override
157     public void decode(final byte[] data, int offsetInput, final int length, final Context context) {
158         
159         int offset = offsetInput;
160         
161         if (context.eof || length < 0) {
162             
163             context.eof = true;
164             if (context.ibitWorkArea != 0) {
165                 this.validateTrailingCharacter();
166             }
167             
168             return;
169         }
170 
171         final int dataLen = Math.min(data.length - offset, length);
172         final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen;
173 
174         // small optimisation to short-cut the rest of this method when it is fed byte-by-byte
175         if (availableChars == 1 && availableChars == dataLen) {
176             
177             context.ibitWorkArea = this.decodeOctet(data[offset]) + 1;   // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
178             return;
179         }
180 
181         // we must have an even number of chars to decode
182         final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1;
183 
184         final byte[] buffer = this.ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context);
185 
186         int result;
187         var i = 0;
188         if (dataLen < availableChars) {
189             
190             // we have 1/2 byte from previous invocation to decode
191             result = (context.ibitWorkArea - 1) << BITS_PER_ENCODED_BYTE;
192             result |= this.decodeOctet(data[offset++]);
193             i = 2;
194 
195             buffer[context.pos++] = (byte)result;
196 
197             // reset to empty-value for next invocation!
198             context.ibitWorkArea = 0;
199         }
200 
201         while (i < charsToProcess) {
202             
203             result = this.decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE;
204             result |= this.decodeOctet(data[offset++]);
205             i += 2;
206             buffer[context.pos++] = (byte)result;
207         }
208 
209         // we have one char of a hex-pair left over
210         if (i < dataLen) {
211             context.ibitWorkArea = this.decodeOctet(data[i]) + 1;   // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
212         }
213     }
214 
215     private int decodeOctet(final byte octet) {
216         
217         int decoded = -1;
218         if ((octet & 0xff) < this.decodeTable.length) {
219             decoded = this.decodeTable[octet];
220         }
221 
222         if (decoded == -1) {
223             throw new IllegalArgumentException("Invalid octet in encoded value: " + (int)octet);
224         }
225 
226         return decoded;
227     }
228 
229     @Override
230     public void encode(final byte[] data, final int offset, final int length, final Context context) {
231         
232         if (context.eof) {
233             return;
234         }
235 
236         if (length < 0) {
237             
238             context.eof = true;
239             return;
240         }
241 
242         final int size = length * BYTES_PER_ENCODED_BLOCK;
243         if (size < 0) {
244             throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length);
245         }
246 
247         final byte[] buffer = this.ensureBufferSize(size, context);
248 
249         final int end = offset + length;
250         for (int i = offset; i < end; i++) {
251             
252             final int value = data[i];
253             final int high = (value >> BITS_PER_ENCODED_BYTE) & MASK_4BITS;
254             final int low = value & MASK_4BITS;
255             buffer[context.pos++] = this.encodeTable[high];
256             buffer[context.pos++] = this.encodeTable[low];
257         }
258     }
259 
260     /**
261      * Returns whether the {@code octet} is in the Base16 alphabet.
262      *
263      * @param octet The value to test.
264      *
265      * @return {@code true} if the value is defined in the the Base16 alphabet {@code false} otherwise.
266      */
267     @Override
268     public boolean isInAlphabet(final byte octet) {
269         return (octet & 0xff) < this.decodeTable.length && this.decodeTable[octet] != -1;
270     }
271 
272     /**
273      * Validates whether decoding allows an entire final trailing character that cannot be
274      * used for a complete byte.
275      *
276      * @throws IllegalArgumentException if strict decoding is enabled
277      */
278     private void validateTrailingCharacter() {
279         if (this.isStrictDecoding()) {
280             throw new IllegalArgumentException(
281                 "Strict decoding: Last encoded character is a valid base 16 alphabet" +
282                 "character but not a possible encoding. " +
283                 "Decoding requires at least two characters to create one byte."
284             );
285         }
286     }
287 }
288