View Javadoc
1   /*******************************************************************************
2    * Copyhacked (H) 2012-2020.
3    * This program and the accompanying materials
4    * are made available under no term at all, use it like
5    * you want, but share and discuss about it
6    * every time possible with every body.
7    * 
8    * Contributors:
9    *      ron190 at ymail dot com - initial implementation
10   ******************************************************************************/
11  package com.jsql.util;
12  
13  import com.jsql.util.bruter.Base16;
14  import com.jsql.util.bruter.Base58;
15  import org.apache.commons.codec.DecoderException;
16  import org.apache.commons.codec.binary.Base32;
17  import org.apache.commons.codec.binary.Hex;
18  import org.apache.commons.codec.binary.StringUtils;
19  import org.apache.commons.text.StringEscapeUtils;
20  import org.apache.logging.log4j.LogManager;
21  import org.apache.logging.log4j.Logger;
22  import org.mozilla.universalchardet.UniversalDetector;
23  
24  import java.io.*;
25  import java.net.URLDecoder;
26  import java.net.URLEncoder;
27  import java.nio.charset.StandardCharsets;
28  import java.util.Base64;
29  import java.util.zip.GZIPInputStream;
30  import java.util.zip.GZIPOutputStream;
31  
32  /**
33   * Utility class adding String operations like join() which are not
34   * part of standard JVM.
35   */
36  public final class StringUtil {
37      
38      /**
39       * Log4j logger sent to view.
40       */
41      private static final Logger LOGGER = LogManager.getRootLogger();
42      
43      // Define the schema of conversion to html entities
44      private static final CharEncoder DECIMAL_HTML_ENCODER = new CharEncoder("&#", ";", 10);
45      
46      /**
47       * This utility class defines a schema used to encode a text into a specialized
48       * representation
49       */
50      private static class CharEncoder {
51          
52          private final String prefix;
53          private final String suffix;
54          private final int radix;
55          
56          public CharEncoder(String prefix, String suffix, int radix) {
57              
58              this.prefix = prefix;
59              this.suffix = suffix;
60              this.radix = radix;
61          }
62          
63          protected void encode(char c, StringBuilder buff) {
64              buff
65              .append(this.prefix)
66              .append(Integer.toString(c, this.radix))
67              .append(this.suffix);
68          }
69      }
70  
71      private StringUtil() {
72          // Utility class
73      }
74      
75      /**
76       * Convert special characters like Chinese and Arabic letters to the corresponding html entities.
77       * @param text string to encode
78       * @return string encoded in html entities
79       */
80      public static String decimalHtmlEncode(String text) {
81          return decimalHtmlEncode(text, false);
82      }
83      
84      public static String decimalHtmlEncode(String text, boolean isRaw) {
85          
86          var result = StringUtil.encode(text);
87          
88          if (isRaw) {
89              return result
90                  .replace("<", "&lt;")
91                  .replace(">", "&gt;")
92                  .replace("&", "&amp;");
93          } else {
94              return result;
95          }
96      }
97      
98      /**
99       * Non trivial methods to convert unicode characters to html entities.
100      *
101      * @param text string to encode
102      * @return string representation using the encoder schema
103      */
104     private static String encode(String text) {
105         
106         var buff = new StringBuilder();
107         
108         for (var i = 0 ; i < text.length() ; i++) {
109             if (text.charAt(i) > 128) {
110                 StringUtil.DECIMAL_HTML_ENCODER.encode(text.charAt(i), buff);
111             } else {
112                 buff.append(text.charAt(i));
113             }
114         }
115         
116         return buff.toString();
117     }
118 
119     /**
120      * Convert a hexadecimal String to String.
121      * @param hex Hexadecimal String to convert
122      * @return The string converted from hex
123      */
124     public static String hexstr(String hex) {
125         
126         var bytes = new byte[hex.length() / 2];
127         
128         for (var i = 0 ; i < bytes.length ; i++) {
129             bytes[i] = (byte) Integer.parseInt(hex.substring(2 * i, 2 * i + 2), 16);
130         }
131         
132         return new String(bytes, StandardCharsets.UTF_8);
133     }
134     
135     public static boolean isUtf8(String text) {
136         
137         if (text == null) {
138             return false;
139         }
140         
141         var detector = new UniversalDetector(null);
142         detector.handleData(text.getBytes(StandardCharsets.UTF_8), 0, text.length() - 1);
143         detector.dataEnd();
144         String encoding = detector.getDetectedCharset();
145         
146         return encoding != null;
147     }
148     
149     public static String detectUtf8(String text) {
150         
151         if (text == null) {
152             return org.apache.commons.lang3.StringUtils.EMPTY;
153         }
154         
155         String encoding = null;
156         
157         // ArrayIndexOutOfBoundsException on handleData()
158         try {
159             var detector = new UniversalDetector(null);
160             detector.handleData(text.getBytes(StandardCharsets.UTF_8), 0, text.length() - 1);
161             detector.dataEnd();
162             encoding = detector.getDetectedCharset();
163             
164         } catch (ArrayIndexOutOfBoundsException e) {
165             LOGGER.log(LogLevelUtil.CONSOLE_JAVA, e, e);
166         }
167         
168         String result = text;
169         if (encoding != null) {
170             result = new String(text.getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8);
171         }
172         
173         return result;
174     }
175     
176     public static String base32Encode(String s) {
177         
178         var base32 = new Base32();
179         return base32.encodeToString(StringUtils.getBytesUtf8(s));
180     }
181     
182     public static String base32Decode(String s) {
183         
184         var base32 = new Base32();
185         return StringUtils.newStringUtf8(base32.decode(s));
186     }
187     
188     public static String base58Encode(String s) {
189         return Base58.encode(StringUtils.getBytesUtf8(s));
190     }
191     
192     public static String base58Decode(String s) {
193         return StringUtils.newStringUtf8(Base58.decode(s));
194     }
195     
196     public static String base16Encode(String s) {
197         
198         var base16 = new Base16();
199         return base16.encodeToString(StringUtils.getBytesUtf8(s));
200     }
201     
202     public static String base16Decode(String s) {
203         
204         var base16 = new Base16();
205         return StringUtils.newStringUtf8(base16.decode(s));
206     }
207 
208     /**
209      * Adapter method for base64 decode.
210      * @param s base64 decode
211      * @return Base64 decoded string
212      */
213     public static String base64Decode(String s) {
214         // org.apache.commons.codec.binary.Base64 fails on RlRQIHVzZXI6IG
215         // Use java.util.Base64 instead
216         return StringUtils.newStringUtf8(Base64.getDecoder().decode(s));
217     }
218 
219     /**
220      * Adapter method for base64 encode.
221      * @param s String to base64 encode
222      * @return Base64 encoded string
223      */
224     public static String base64Encode(String s) {
225         // org.apache.commons.codec.binary.Base64 fails on RlRQIHVzZXI6IG
226         // Use java.util.Base64 instead
227         return Base64.getEncoder().encodeToString(StringUtils.getBytesUtf8(s));
228     }
229 
230     /**
231      * Zip a string.
232      * @param str Text to zip
233      * @return Zipped string
234      * @throws IOException
235      */
236     public static String compress(String str) throws IOException {
237         
238         if (org.apache.commons.lang3.StringUtils.isEmpty(str)) {
239             return str;
240         }
241         
242         var out = new ByteArrayOutputStream();
243         var gzip = new GZIPOutputStream(out);
244         gzip.write(str.getBytes(StandardCharsets.UTF_8));
245         gzip.close();
246         
247         return out.toString(StandardCharsets.ISO_8859_1);
248     }
249 
250     /**
251      * Unzip a String encoded from base64 or hexadecimal.
252      * @param str String to unzip
253      * @return String unzipped
254      * @throws IOException
255      */
256     public static String decompress(String str) throws IOException {
257         
258         if (org.apache.commons.lang3.StringUtils.isEmpty(str)) {
259             return str;
260         }
261         
262         final var encode = "ISO-8859-1";
263         var gis = new GZIPInputStream(new ByteArrayInputStream(str.getBytes(encode)));
264         var bf = new BufferedReader(new InputStreamReader(gis, encode));
265 
266         var buff = new char[1024];
267         int read;
268         var response = new StringBuilder();
269         
270         while ((read = bf.read(buff)) != -1) {
271             response.append(buff, 0, read);
272         }
273         
274         return response.toString();
275     }
276     
277     public static String toHex(String text) {
278         return Hex.encodeHexString(text.getBytes(StandardCharsets.UTF_8)).trim();
279     }
280     
281     public static String fromHex(String text) throws DecoderException {
282         
283         byte[] hex = Hex.decodeHex(text.toCharArray());
284         
285         return new String(hex, StandardCharsets.UTF_8);
286     }
287     
288     public static String toHexZip(String text) throws IOException {
289         
290         byte[] zip = StringUtil.compress(text).getBytes(StandardCharsets.UTF_8);
291         
292         return Hex.encodeHexString(zip).trim();
293     }
294     
295     public static String fromHexZip(String text) throws IOException, DecoderException {
296         
297         byte[] hex = Hex.decodeHex(text.toCharArray());
298         var zip = new String(hex, StandardCharsets.UTF_8);
299         
300         return StringUtil.decompress(zip);
301     }
302     
303     public static String toBase64Zip(String text) throws IOException {
304         return StringUtil.base64Encode(StringUtil.compress(text));
305     }
306     
307     public static String fromBase64Zip(String text) throws IOException {
308         return StringUtil.decompress(StringUtil.base64Decode(text));
309     }
310     
311     public static String toHtml(String text) {
312         return StringEscapeUtils.escapeHtml4(text)
313             .replace("<", "&lt;")
314             .replace(">", "&gt;")
315             .replace("&", "&amp;");
316     }
317     
318     public static String fromHtml(String text) {
319         return StringEscapeUtils.unescapeHtml4(text)
320             .replace("<", "&lt;")
321             .replace(">", "&gt;");
322     }
323     
324     public static String toUrl(String text) {
325         return URLEncoder.encode(text, StandardCharsets.UTF_8);
326     }
327     
328     public static String fromUrl(String text) {
329         return URLDecoder.decode(text, StandardCharsets.UTF_8);
330     }
331     
332     public static String cleanSql(String query) {
333         return removeSqlComment(query)
334             .replaceAll("(?s)([^\\s\\w])(\\s+)", "$1")  // Remove spaces after a word
335             .replaceAll("(?s)(\\s+)([^\\s\\w])", "$2")  // Remove spaces before a word
336             .replaceAll("(?s)\\s+", " ")  // Replace spaces
337             .trim();
338     }
339 
340     /**
341      * Remove SQL comments except tamper /**\/ /*!...*\/
342      * Negative lookahead: don't match tamper empty comment /**\/ or version comment /*!...*\/
343      * JavaScript: (?!\/\*!.*\*\/|\/\*\*\/)\/\*.*\*\/
344      */
345     public static String removeSqlComment(String query) {
346         return query.replaceAll(
347             "(?s)(?!/\\*\\*/|/\\*!.*\\*/)/\\*.*?\\*/",
348             org.apache.commons.lang3.StringUtils.EMPTY
349         );
350     }
351 }