Add encoding detection

This commit is contained in:
odz
2013-08-24 00:54:40 +09:00
parent 6d76e93ede
commit 13578dcee8
4 changed files with 20 additions and 7 deletions

View File

@@ -2,7 +2,7 @@ package app
import util.Directory._ import util.Directory._
import util.Implicits._ import util.Implicits._
import _root_.util.{ReferrerAuthenticator, JGitUtil, FileUtil} import _root_.util.{ReferrerAuthenticator, JGitUtil, FileUtil, StringUtil}
import service._ import service._
import org.scalatra._ import org.scalatra._
import java.io.File import java.io.File
@@ -106,7 +106,7 @@ trait RepositoryViewerControllerBase extends ControllerBase {
val content = if(viewer == "other"){ val content = if(viewer == "other"){
if(bytes.isDefined && FileUtil.isText(bytes.get)){ if(bytes.isDefined && FileUtil.isText(bytes.get)){
// text // text
JGitUtil.ContentInfo("text", bytes.map(new String(_, "UTF-8"))) JGitUtil.ContentInfo("text", bytes.map(StringUtil.convertFromByteArray))
} else { } else {
// binary // binary
JGitUtil.ContentInfo("binary", None) JGitUtil.ContentInfo("binary", None)
@@ -243,7 +243,7 @@ trait RepositoryViewerControllerBase extends ControllerBase {
val files = JGitUtil.getFileList(git, revision, path) val files = JGitUtil.getFileList(git, revision, path)
// process README.md // process README.md
val readme = files.find(_.name == "README.md").map { file => val readme = files.find(_.name == "README.md").map { file =>
new String(JGitUtil.getContent(Git.open(getRepositoryDir(repository.owner, repository.name)), file.id, true).get, "UTF-8") StringUtil.convertFromByteArray(JGitUtil.getContent(Git.open(getRepositoryDir(repository.owner, repository.name)), file.id, true).get)
} }
repo.html.files(revision, repository, repo.html.files(revision, repository,

View File

@@ -64,7 +64,7 @@ trait RepositorySearchService { self: IssuesService =>
if(treeWalk.getFileMode(0) != FileMode.TREE){ if(treeWalk.getFileMode(0) != FileMode.TREE){
JGitUtil.getContent(git, treeWalk.getObjectId(0), false).foreach { bytes => JGitUtil.getContent(git, treeWalk.getObjectId(0), false).foreach { bytes =>
if(FileUtil.isText(bytes)){ if(FileUtil.isText(bytes)){
val text = new String(bytes, "UTF-8") val text = StringUtil.convertFromByteArray(bytes)
val lowerText = text.toLowerCase val lowerText = text.toLowerCase
val indices = keywords.map(lowerText.indexOf _) val indices = keywords.map(lowerText.indexOf _)
if(!indices.exists(_ < 0)){ if(!indices.exists(_ < 0)){

View File

@@ -2,6 +2,7 @@ package util
import org.eclipse.jgit.api.Git import org.eclipse.jgit.api.Git
import util.Directory._ import util.Directory._
import util.StringUtil._
import scala.collection.JavaConverters._ import scala.collection.JavaConverters._
import org.eclipse.jgit.lib._ import org.eclipse.jgit.lib._
import org.eclipse.jgit.revwalk._ import org.eclipse.jgit.revwalk._
@@ -414,7 +415,7 @@ object JGitUtil {
DiffInfo(ChangeType.ADD, null, walk.getPathString, None, None) DiffInfo(ChangeType.ADD, null, walk.getPathString, None, None)
} else { } else {
DiffInfo(ChangeType.ADD, null, walk.getPathString, None, DiffInfo(ChangeType.ADD, null, walk.getPathString, None,
JGitUtil.getContent(git, walk.getObjectId(0), false).filter(FileUtil.isText).map(new String(_, "UTF-8"))) JGitUtil.getContent(git, walk.getObjectId(0), false).filter(FileUtil.isText).map(convertFromByteArray))
})) }))
} }
walk.release walk.release
@@ -436,8 +437,8 @@ object JGitUtil {
DiffInfo(diff.getChangeType, diff.getOldPath, diff.getNewPath, None, None) DiffInfo(diff.getChangeType, diff.getOldPath, diff.getNewPath, None, None)
} else { } else {
DiffInfo(diff.getChangeType, diff.getOldPath, diff.getNewPath, DiffInfo(diff.getChangeType, diff.getOldPath, diff.getNewPath,
JGitUtil.getContent(git, diff.getOldId.toObjectId, false).filter(FileUtil.isText).map(new String(_, "UTF-8")), JGitUtil.getContent(git, diff.getOldId.toObjectId, false).filter(FileUtil.isText).map(convertFromByteArray),
JGitUtil.getContent(git, diff.getNewId.toObjectId, false).filter(FileUtil.isText).map(new String(_, "UTF-8"))) JGitUtil.getContent(git, diff.getNewId.toObjectId, false).filter(FileUtil.isText).map(convertFromByteArray))
} }
}.toList }.toList
} }

View File

@@ -1,6 +1,7 @@
package util package util
import java.net.{URLDecoder, URLEncoder} import java.net.{URLDecoder, URLEncoder}
import org.mozilla.universalchardet.UniversalDetector
object StringUtil { object StringUtil {
@@ -25,4 +26,15 @@ object StringUtil {
def escapeHtml(value: String): String = def escapeHtml(value: String): String =
value.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace("\"", "&quot;") value.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace("\"", "&quot;")
def convertFromByteArray(content: Array[Byte]): String = new String(content, detectEncoding(content))
def detectEncoding(content: Array[Byte]): String = {
val detector = new UniversalDetector(null)
detector.handleData(content, 0, content.length)
detector.dataEnd()
detector.getDetectedCharset match {
case null => "UTF-8"
case e => e
}
}
} }