(refs #251)Remove BOM from UTF-8 string.

This commit is contained in:
takezoe
2014-02-01 07:08:03 +09:00
parent 1c529eea3d
commit e87c69f989
3 changed files with 14 additions and 5 deletions

View File

@@ -3,6 +3,8 @@ package util
import java.net.{URLDecoder, URLEncoder}
import org.mozilla.universalchardet.UniversalDetector
import util.ControlUtil._
import org.apache.commons.io.input.BOMInputStream
import org.apache.commons.io.IOUtils
object StringUtil {
@@ -27,7 +29,12 @@ object StringUtil {
def escapeHtml(value: String): String =
value.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace("\"", "&quot;")
def convertFromByteArray(content: Array[Byte]): String = new String(content, detectEncoding(content))
/**
* Make string from byte array. Character encoding is detected automatically by [[util.StringUtil.detectEncoding]].
* And if given bytes contains UTF-8 BOM, it's removed from returned string..
*/
def convertFromByteArray(content: Array[Byte]): String =
IOUtils.toString(new BOMInputStream(new java.io.ByteArrayInputStream(content)), detectEncoding(content))
def detectEncoding(content: Array[Byte]): String =
defining(new UniversalDetector(null)){ detector =>