Fix code highlighting on blame page (#36157)

1. Full file highlighting (fix the legacy todo "we should instead
highlight the whole file at once")
    * Fix #24383
2. Correctly covert file content encoding
3. Remove dead code, split large for-loop into small functions/blocks to
make code maintainable
This commit is contained in:
wxiaoguang
2025-12-14 18:40:55 +08:00
committed by GitHub
parent 1f5237e0d7
commit 7190519fb3
7 changed files with 116 additions and 99 deletions

View File

@@ -20,14 +20,17 @@ import (
// RuneNBSP is the codepoint for NBSP // RuneNBSP is the codepoint for NBSP
const RuneNBSP = 0xa0 const RuneNBSP = 0xa0
// EscapeControlHTML escapes the unicode control sequences in a provided html document // EscapeControlHTML escapes the Unicode control sequences in a provided html document
func EscapeControlHTML(html template.HTML, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output template.HTML) { func EscapeControlHTML(html template.HTML, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output template.HTML) {
if !setting.UI.AmbiguousUnicodeDetection {
return &EscapeStatus{}, html
}
sb := &strings.Builder{} sb := &strings.Builder{}
escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, allowed...) // err has been handled in EscapeControlReader escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, allowed...) // err has been handled in EscapeControlReader
return escaped, template.HTML(sb.String()) return escaped, template.HTML(sb.String())
} }
// EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus // EscapeControlReader escapes the Unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) { func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
if !setting.UI.AmbiguousUnicodeDetection { if !setting.UI.AmbiguousUnicodeDetection {
_, err = io.Copy(writer, reader) _, err = io.Copy(writer, reader)

View File

@@ -56,7 +56,39 @@ func NewContext() {
}) })
} }
// Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name // UnsafeSplitHighlightedLines splits highlighted code into lines preserving HTML tags
// It always includes '\n', '\n' can appear at the end of each line or in the middle of HTML tags
// The '\n' is necessary for copying code from web UI to preserve original code lines
// ATTENTION: It uses the unsafe conversion between string and []byte for performance reason
// DO NOT make any modification to the returned [][]byte slice items
func UnsafeSplitHighlightedLines(code template.HTML) (ret [][]byte) {
buf := util.UnsafeStringToBytes(string(code))
lineCount := bytes.Count(buf, []byte("\n")) + 1
ret = make([][]byte, 0, lineCount)
nlTagClose := []byte("\n</")
for {
pos := bytes.IndexByte(buf, '\n')
if pos == -1 {
if len(buf) > 0 {
ret = append(ret, buf)
}
return ret
}
// Chroma highlighting output sometimes have "</span>" right after \n, sometimes before.
// * "<span>text\n</span>"
// * "<span>text</span>\n"
if bytes.HasPrefix(buf[pos:], nlTagClose) {
pos1 := bytes.IndexByte(buf[pos:], '>')
if pos1 != -1 {
pos += pos1
}
}
ret = append(ret, buf[:pos+1])
buf = buf[pos+1:]
}
}
// Code returns an HTML version of code string with chroma syntax highlighting classes and the matched lexer name
func Code(fileName, language, code string) (output template.HTML, lexerName string) { func Code(fileName, language, code string) (output template.HTML, lexerName string) {
NewContext() NewContext()

View File

@@ -181,3 +181,21 @@ c=2`),
}) })
} }
} }
func TestUnsafeSplitHighlightedLines(t *testing.T) {
ret := UnsafeSplitHighlightedLines("")
assert.Empty(t, ret)
ret = UnsafeSplitHighlightedLines("a")
assert.Len(t, ret, 1)
assert.Equal(t, "a", string(ret[0]))
ret = UnsafeSplitHighlightedLines("\n")
assert.Len(t, ret, 1)
assert.Equal(t, "\n", string(ret[0]))
ret = UnsafeSplitHighlightedLines("<span>a</span>\n<span>b\n</span>")
assert.Len(t, ret, 2)
assert.Equal(t, "<span>a</span>\n", string(ret[0]))
assert.Equal(t, "<span>b\n</span>", string(ret[1]))
}

View File

@@ -4,8 +4,9 @@
package repo package repo
import ( import (
"bytes"
"fmt" "fmt"
gotemplate "html/template" "html/template"
"net/http" "net/http"
"net/url" "net/url"
"path" "path"
@@ -26,16 +27,15 @@ import (
type blameRow struct { type blameRow struct {
RowNumber int RowNumber int
Avatar gotemplate.HTML
RepoLink string Avatar template.HTML
PartSha string
PreviousSha string PreviousSha string
PreviousShaURL string PreviousShaURL string
IsFirstCommit bool
CommitURL string CommitURL string
CommitMessage string CommitMessage string
CommitSince gotemplate.HTML CommitSince template.HTML
Code gotemplate.HTML
Code template.HTML
EscapeStatus *charset.EscapeStatus EscapeStatus *charset.EscapeStatus
} }
@@ -220,76 +220,64 @@ func processBlameParts(ctx *context.Context, blameParts []*git.BlamePart) map[st
return commitNames return commitNames
} }
func renderBlame(ctx *context.Context, blameParts []*git.BlamePart, commitNames map[string]*user_model.UserCommit) { func renderBlameFillFirstBlameRow(repoLink string, avatarUtils *templates.AvatarUtils, part *git.BlamePart, commit *user_model.UserCommit, br *blameRow) {
repoLink := ctx.Repo.RepoLink if commit.User != nil {
br.Avatar = avatarUtils.Avatar(commit.User, 18)
} else {
br.Avatar = avatarUtils.AvatarByEmail(commit.Author.Email, commit.Author.Name, 18)
}
br.PreviousSha = part.PreviousSha
br.PreviousShaURL = fmt.Sprintf("%s/blame/commit/%s/%s", repoLink, url.PathEscape(part.PreviousSha), util.PathEscapeSegments(part.PreviousPath))
br.CommitURL = fmt.Sprintf("%s/commit/%s", repoLink, url.PathEscape(part.Sha))
br.CommitMessage = commit.CommitMessage
br.CommitSince = templates.TimeSince(commit.Author.When)
}
func renderBlame(ctx *context.Context, blameParts []*git.BlamePart, commitNames map[string]*user_model.UserCommit) {
language, err := languagestats.GetFileLanguage(ctx, ctx.Repo.GitRepo, ctx.Repo.CommitID, ctx.Repo.TreePath) language, err := languagestats.GetFileLanguage(ctx, ctx.Repo.GitRepo, ctx.Repo.CommitID, ctx.Repo.TreePath)
if err != nil { if err != nil {
log.Error("Unable to get file language for %-v:%s. Error: %v", ctx.Repo.Repository, ctx.Repo.TreePath, err) log.Error("Unable to get file language for %-v:%s. Error: %v", ctx.Repo.Repository, ctx.Repo.TreePath, err)
} }
lines := make([]string, 0) buf := &bytes.Buffer{}
rows := make([]*blameRow, 0) rows := make([]*blameRow, 0)
avatarUtils := templates.NewAvatarUtils(ctx)
rowNumber := 0 // will be 1-based
for _, part := range blameParts {
for partLineIdx, line := range part.Lines {
rowNumber++
br := &blameRow{RowNumber: rowNumber}
rows = append(rows, br)
if int64(buf.Len()) < setting.UI.MaxDisplayFileSize {
buf.WriteString(line)
buf.WriteByte('\n')
}
if partLineIdx == 0 {
renderBlameFillFirstBlameRow(ctx.Repo.RepoLink, avatarUtils, part, commitNames[part.Sha], br)
}
}
}
escapeStatus := &charset.EscapeStatus{} escapeStatus := &charset.EscapeStatus{}
var lexerName string bufContent := buf.Bytes()
bufContent = charset.ToUTF8(bufContent, charset.ConvertOpts{})
avatarUtils := templates.NewAvatarUtils(ctx) highlighted, lexerName := highlight.Code(path.Base(ctx.Repo.TreePath), language, util.UnsafeBytesToString(bufContent))
i := 0 unsafeLines := highlight.UnsafeSplitHighlightedLines(highlighted)
commitCnt := 0 for i, br := range rows {
for _, part := range blameParts { var line template.HTML
for index, line := range part.Lines { if i < len(rows) {
i++ line = template.HTML(util.UnsafeBytesToString(unsafeLines[i]))
lines = append(lines, line)
br := &blameRow{
RowNumber: i,
} }
commit := commitNames[part.Sha]
if index == 0 {
// Count commit number
commitCnt++
// User avatar image
commitSince := templates.TimeSince(commit.Author.When)
var avatar string
if commit.User != nil {
avatar = string(avatarUtils.Avatar(commit.User, 18))
} else {
avatar = string(avatarUtils.AvatarByEmail(commit.Author.Email, commit.Author.Name, 18, "tw-mr-2"))
}
br.Avatar = gotemplate.HTML(avatar)
br.RepoLink = repoLink
br.PartSha = part.Sha
br.PreviousSha = part.PreviousSha
br.PreviousShaURL = fmt.Sprintf("%s/blame/commit/%s/%s", repoLink, url.PathEscape(part.PreviousSha), util.PathEscapeSegments(part.PreviousPath))
br.CommitURL = fmt.Sprintf("%s/commit/%s", repoLink, url.PathEscape(part.Sha))
br.CommitMessage = commit.CommitMessage
br.CommitSince = commitSince
}
if i != len(lines)-1 {
line += "\n"
}
line, lexerNameForLine := highlight.Code(path.Base(ctx.Repo.TreePath), language, line)
// set lexer name to the first detected lexer. this is certainly suboptimal and
// we should instead highlight the whole file at once
if lexerName == "" {
lexerName = lexerNameForLine
}
br.EscapeStatus, br.Code = charset.EscapeControlHTML(line, ctx.Locale) br.EscapeStatus, br.Code = charset.EscapeControlHTML(line, ctx.Locale)
rows = append(rows, br)
escapeStatus = escapeStatus.Or(br.EscapeStatus) escapeStatus = escapeStatus.Or(br.EscapeStatus)
} }
}
ctx.Data["EscapeStatus"] = escapeStatus ctx.Data["EscapeStatus"] = escapeStatus
ctx.Data["BlameRows"] = rows ctx.Data["BlameRows"] = rows
ctx.Data["CommitCnt"] = commitCnt
ctx.Data["LexerName"] = lexerName ctx.Data["LexerName"] = lexerName
} }

View File

@@ -1336,35 +1336,11 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit
return diff, nil return diff, nil
} }
func splitHighlightLines(buf []byte) (ret [][]byte) {
lineCount := bytes.Count(buf, []byte("\n")) + 1
ret = make([][]byte, 0, lineCount)
nlTagClose := []byte("\n</")
for {
pos := bytes.IndexByte(buf, '\n')
if pos == -1 {
ret = append(ret, buf)
return ret
}
// Chroma highlighting output sometimes have "</span>" right after \n, sometimes before.
// * "<span>text\n</span>"
// * "<span>text</span>\n"
if bytes.HasPrefix(buf[pos:], nlTagClose) {
pos1 := bytes.IndexByte(buf[pos:], '>')
if pos1 != -1 {
pos += pos1
}
}
ret = append(ret, buf[:pos+1])
buf = buf[pos+1:]
}
}
func highlightCodeLines(diffFile *DiffFile, isLeft bool, rawContent []byte) map[int]template.HTML { func highlightCodeLines(diffFile *DiffFile, isLeft bool, rawContent []byte) map[int]template.HTML {
content := util.UnsafeBytesToString(charset.ToUTF8(rawContent, charset.ConvertOpts{})) content := util.UnsafeBytesToString(charset.ToUTF8(rawContent, charset.ConvertOpts{}))
highlightedNewContent, _ := highlight.Code(diffFile.Name, diffFile.Language, content) highlightedNewContent, _ := highlight.Code(diffFile.Name, diffFile.Language, content)
splitLines := splitHighlightLines([]byte(highlightedNewContent)) unsafeLines := highlight.UnsafeSplitHighlightedLines(highlightedNewContent)
lines := make(map[int]template.HTML, len(splitLines)) lines := make(map[int]template.HTML, len(unsafeLines))
// only save the highlighted lines we need, but not the whole file, to save memory // only save the highlighted lines we need, but not the whole file, to save memory
for _, sec := range diffFile.Sections { for _, sec := range diffFile.Sections {
for _, ln := range sec.Lines { for _, ln := range sec.Lines {
@@ -1374,8 +1350,8 @@ func highlightCodeLines(diffFile *DiffFile, isLeft bool, rawContent []byte) map[
} }
if lineIdx >= 1 { if lineIdx >= 1 {
idx := lineIdx - 1 idx := lineIdx - 1
if idx < len(splitLines) { if idx < len(unsafeLines) {
lines[idx] = template.HTML(splitLines[idx]) lines[idx] = template.HTML(util.UnsafeBytesToString(unsafeLines[idx]))
} }
} }
} }

View File

@@ -38,7 +38,7 @@
<table> <table>
<tbody> <tbody>
{{range $row := .BlameRows}} {{range $row := .BlameRows}}
<tr class="{{if and (gt $.CommitCnt 1) ($row.CommitMessage)}}top-line-blame{{end}}"> <tr class="{{if $row.CommitURL}}top-line-blame{{end}}">
<td class="lines-commit"> <td class="lines-commit">
<div class="blame-info"> <div class="blame-info">
<div class="blame-data"> <div class="blame-data">

View File

@@ -919,7 +919,7 @@ overflow-menu .ui.label {
.blame-avatar { .blame-avatar {
display: flex; display: flex;
align-items: center; align-items: center;
margin-right: 4px; margin-right: 6px;
} }
tr.top-line-blame { tr.top-line-blame {