StringUtil.java
/*******************************************************************************
* Copyhacked (H) 2012-2020.
* This program and the accompanying materials
* are made available under no term at all, use it like
* you want, but share and discuss about it
* every time possible with every body.
*
* Contributors:
* ron190 at ymail dot com - initial implementation
******************************************************************************/
package com.jsql.util;
import com.jsql.util.bruter.Base16;
import com.jsql.util.bruter.Base58;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Base32;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.binary.StringUtils;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.mozilla.universalchardet.UniversalDetector;
import java.io.*;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
/**
* Utility class adding String operations like join() which are not
* part of standard JVM.
*/
public final class StringUtil {
/**
* Log4j logger sent to view.
*/
private static final Logger LOGGER = LogManager.getRootLogger();
// Define the schema of conversion to html entities
private static final CharEncoder DECIMAL_HTML_ENCODER = new CharEncoder("&#", ";", 10);
/**
* This utility class defines a schema used to encode a text into a specialized
* representation
*/
private static class CharEncoder {
private final String prefix;
private final String suffix;
private final int radix;
public CharEncoder(String prefix, String suffix, int radix) {
this.prefix = prefix;
this.suffix = suffix;
this.radix = radix;
}
protected void encode(char c, StringBuilder buff) {
buff
.append(this.prefix)
.append(Integer.toString(c, this.radix))
.append(this.suffix);
}
}
private StringUtil() {
// Utility class
}
/**
* Convert special characters like Chinese and Arabic letters to the corresponding html entities.
* @param text string to encode
* @return string encoded in html entities
*/
public static String decimalHtmlEncode(String text) {
return decimalHtmlEncode(text, false);
}
public static String decimalHtmlEncode(String text, boolean isRaw) {
var result = StringUtil.encode(text);
if (isRaw) {
return result
.replace("<", "<")
.replace(">", ">")
.replace("&", "&");
} else {
return result;
}
}
/**
* Non trivial methods to convert unicode characters to html entities.
*
* @param text string to encode
* @return string representation using the encoder schema
*/
private static String encode(String text) {
var buff = new StringBuilder();
for (var i = 0 ; i < text.length() ; i++) {
if (text.charAt(i) > 128) {
StringUtil.DECIMAL_HTML_ENCODER.encode(text.charAt(i), buff);
} else {
buff.append(text.charAt(i));
}
}
return buff.toString();
}
/**
* Convert a hexadecimal String to String.
* @param hex Hexadecimal String to convert
* @return The string converted from hex
*/
public static String hexstr(String hex) {
var bytes = new byte[hex.length() / 2];
for (var i = 0 ; i < bytes.length ; i++) {
bytes[i] = (byte) Integer.parseInt(hex.substring(2 * i, 2 * i + 2), 16);
}
return new String(bytes, StandardCharsets.UTF_8);
}
public static boolean isUtf8(String text) {
if (text == null) {
return false;
}
var detector = new UniversalDetector(null);
detector.handleData(text.getBytes(StandardCharsets.UTF_8), 0, text.length() - 1);
detector.dataEnd();
String encoding = detector.getDetectedCharset();
return encoding != null;
}
public static String detectUtf8(String text) {
if (text == null) {
return org.apache.commons.lang3.StringUtils.EMPTY;
}
String encoding = null;
// ArrayIndexOutOfBoundsException on handleData()
try {
var detector = new UniversalDetector(null);
detector.handleData(text.getBytes(StandardCharsets.UTF_8), 0, text.length() - 1);
detector.dataEnd();
encoding = detector.getDetectedCharset();
} catch (ArrayIndexOutOfBoundsException e) {
LOGGER.log(LogLevelUtil.CONSOLE_JAVA, e, e);
}
String result = text;
if (encoding != null) {
result = new String(text.getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8);
}
return result;
}
public static String base32Encode(String s) {
var base32 = new Base32();
return base32.encodeToString(StringUtils.getBytesUtf8(s));
}
public static String base32Decode(String s) {
var base32 = new Base32();
return StringUtils.newStringUtf8(base32.decode(s));
}
public static String base58Encode(String s) {
return Base58.encode(StringUtils.getBytesUtf8(s));
}
public static String base58Decode(String s) {
return StringUtils.newStringUtf8(Base58.decode(s));
}
public static String base16Encode(String s) {
var base16 = new Base16();
return base16.encodeToString(StringUtils.getBytesUtf8(s));
}
public static String base16Decode(String s) {
var base16 = new Base16();
return StringUtils.newStringUtf8(base16.decode(s));
}
/**
* Adapter method for base64 decode.
* @param s base64 decode
* @return Base64 decoded string
*/
public static String base64Decode(String s) {
// org.apache.commons.codec.binary.Base64 fails on RlRQIHVzZXI6IG
// Use java.util.Base64 instead
return StringUtils.newStringUtf8(Base64.getDecoder().decode(s));
}
/**
* Adapter method for base64 encode.
* @param s String to base64 encode
* @return Base64 encoded string
*/
public static String base64Encode(String s) {
// org.apache.commons.codec.binary.Base64 fails on RlRQIHVzZXI6IG
// Use java.util.Base64 instead
return Base64.getEncoder().encodeToString(StringUtils.getBytesUtf8(s));
}
/**
* Zip a string.
* @param str Text to zip
* @return Zipped string
* @throws IOException
*/
public static String compress(String str) throws IOException {
if (org.apache.commons.lang3.StringUtils.isEmpty(str)) {
return str;
}
var out = new ByteArrayOutputStream();
var gzip = new GZIPOutputStream(out);
gzip.write(str.getBytes(StandardCharsets.UTF_8));
gzip.close();
return out.toString(StandardCharsets.ISO_8859_1);
}
/**
* Unzip a String encoded from base64 or hexadecimal.
* @param str String to unzip
* @return String unzipped
* @throws IOException
*/
public static String decompress(String str) throws IOException {
if (org.apache.commons.lang3.StringUtils.isEmpty(str)) {
return str;
}
final var encode = "ISO-8859-1";
var gis = new GZIPInputStream(new ByteArrayInputStream(str.getBytes(encode)));
var bf = new BufferedReader(new InputStreamReader(gis, encode));
var buff = new char[1024];
int read;
var response = new StringBuilder();
while ((read = bf.read(buff)) != -1) {
response.append(buff, 0, read);
}
return response.toString();
}
public static String toHex(String text) {
return Hex.encodeHexString(text.getBytes(StandardCharsets.UTF_8)).trim();
}
public static String fromHex(String text) throws DecoderException {
byte[] hex = Hex.decodeHex(text.toCharArray());
return new String(hex, StandardCharsets.UTF_8);
}
public static String toHexZip(String text) throws IOException {
byte[] zip = StringUtil.compress(text).getBytes(StandardCharsets.UTF_8);
return Hex.encodeHexString(zip).trim();
}
public static String fromHexZip(String text) throws IOException, DecoderException {
byte[] hex = Hex.decodeHex(text.toCharArray());
var zip = new String(hex, StandardCharsets.UTF_8);
return StringUtil.decompress(zip);
}
public static String toBase64Zip(String text) throws IOException {
return StringUtil.base64Encode(StringUtil.compress(text));
}
public static String fromBase64Zip(String text) throws IOException {
return StringUtil.decompress(StringUtil.base64Decode(text));
}
public static String toHtml(String text) {
return StringEscapeUtils.escapeHtml4(text)
.replace("<", "<")
.replace(">", ">")
.replace("&", "&");
}
public static String fromHtml(String text) {
return StringEscapeUtils.unescapeHtml4(text)
.replace("<", "<")
.replace(">", ">");
}
public static String toUrl(String text) {
return URLEncoder.encode(text, StandardCharsets.UTF_8);
}
public static String fromUrl(String text) {
return URLDecoder.decode(text, StandardCharsets.UTF_8);
}
public static String cleanSql(String query) {
return removeSqlComment(query)
.replaceAll("(?s)([^\\s\\w])(\\s+)", "$1") // Remove spaces after a word
.replaceAll("(?s)(\\s+)([^\\s\\w])", "$2") // Remove spaces before a word
.replaceAll("(?s)\\s+", " ") // Replace spaces
.trim();
}
/**
* Remove SQL comments except tamper /**\/ /*!...*\/
* Negative lookahead: don't match tamper empty comment /**\/ or version comment /*!...*\/
* JavaScript: (?!\/\*!.*\*\/|\/\*\*\/)\/\*.*\*\/
*/
public static String removeSqlComment(String query) {
return query.replaceAll(
"(?s)(?!/\\*\\*/|/\\*!.*\\*/)/\\*.*?\\*/",
org.apache.commons.lang3.StringUtils.EMPTY
);
}
}