From 7190519fb33d26a55548398aa1a56aa4b4612a3c Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Sun, 14 Dec 2025 18:40:55 +0800 Subject: [PATCH] Fix code highlighting on blame page (#36157) 1. Full file highlighting (fix the legacy todo "we should instead highlight the whole file at once") * Fix #24383 2. Correctly covert file content encoding 3. Remove dead code, split large for-loop into small functions/blocks to make code maintainable --- modules/charset/escape.go | 7 +- modules/highlight/highlight.go | 34 +++++++- modules/highlight/highlight_test.go | 18 +++++ routers/web/repo/blame.go | 120 +++++++++++++--------------- services/gitdiff/gitdiff.go | 32 +------- templates/repo/blame.tmpl | 2 +- web_src/css/base.css | 2 +- 7 files changed, 116 insertions(+), 99 deletions(-) diff --git a/modules/charset/escape.go b/modules/charset/escape.go index 92e417d1f7..167683a298 100644 --- a/modules/charset/escape.go +++ b/modules/charset/escape.go @@ -20,14 +20,17 @@ import ( // RuneNBSP is the codepoint for NBSP const RuneNBSP = 0xa0 -// EscapeControlHTML escapes the unicode control sequences in a provided html document +// EscapeControlHTML escapes the Unicode control sequences in a provided html document func EscapeControlHTML(html template.HTML, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output template.HTML) { + if !setting.UI.AmbiguousUnicodeDetection { + return &EscapeStatus{}, html + } sb := &strings.Builder{} escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, allowed...) // err has been handled in EscapeControlReader return escaped, template.HTML(sb.String()) } -// EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus +// EscapeControlReader escapes the Unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) { if !setting.UI.AmbiguousUnicodeDetection { _, err = io.Copy(writer, reader) diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index 77e47fdf48..2b13e9c4ce 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -56,7 +56,39 @@ func NewContext() { }) } -// Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name +// UnsafeSplitHighlightedLines splits highlighted code into lines preserving HTML tags +// It always includes '\n', '\n' can appear at the end of each line or in the middle of HTML tags +// The '\n' is necessary for copying code from web UI to preserve original code lines +// ATTENTION: It uses the unsafe conversion between string and []byte for performance reason +// DO NOT make any modification to the returned [][]byte slice items +func UnsafeSplitHighlightedLines(code template.HTML) (ret [][]byte) { + buf := util.UnsafeStringToBytes(string(code)) + lineCount := bytes.Count(buf, []byte("\n")) + 1 + ret = make([][]byte, 0, lineCount) + nlTagClose := []byte("\n 0 { + ret = append(ret, buf) + } + return ret + } + // Chroma highlighting output sometimes have "" right after \n, sometimes before. + // * "text\n" + // * "text\n" + if bytes.HasPrefix(buf[pos:], nlTagClose) { + pos1 := bytes.IndexByte(buf[pos:], '>') + if pos1 != -1 { + pos += pos1 + } + } + ret = append(ret, buf[:pos+1]) + buf = buf[pos+1:] + } +} + +// Code returns an HTML version of code string with chroma syntax highlighting classes and the matched lexer name func Code(fileName, language, code string) (output template.HTML, lexerName string) { NewContext() diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index b36de98c5c..52873427a8 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -181,3 +181,21 @@ c=2`), }) } } + +func TestUnsafeSplitHighlightedLines(t *testing.T) { + ret := UnsafeSplitHighlightedLines("") + assert.Empty(t, ret) + + ret = UnsafeSplitHighlightedLines("a") + assert.Len(t, ret, 1) + assert.Equal(t, "a", string(ret[0])) + + ret = UnsafeSplitHighlightedLines("\n") + assert.Len(t, ret, 1) + assert.Equal(t, "\n", string(ret[0])) + + ret = UnsafeSplitHighlightedLines("a\nb\n") + assert.Len(t, ret, 2) + assert.Equal(t, "a\n", string(ret[0])) + assert.Equal(t, "b\n", string(ret[1])) +} diff --git a/routers/web/repo/blame.go b/routers/web/repo/blame.go index 0eebff6aa8..6a4618a3c7 100644 --- a/routers/web/repo/blame.go +++ b/routers/web/repo/blame.go @@ -4,8 +4,9 @@ package repo import ( + "bytes" "fmt" - gotemplate "html/template" + "html/template" "net/http" "net/url" "path" @@ -25,18 +26,17 @@ import ( ) type blameRow struct { - RowNumber int - Avatar gotemplate.HTML - RepoLink string - PartSha string + RowNumber int + + Avatar template.HTML PreviousSha string PreviousShaURL string - IsFirstCommit bool CommitURL string CommitMessage string - CommitSince gotemplate.HTML - Code gotemplate.HTML - EscapeStatus *charset.EscapeStatus + CommitSince template.HTML + + Code template.HTML + EscapeStatus *charset.EscapeStatus } // RefBlame render blame page @@ -220,76 +220,64 @@ func processBlameParts(ctx *context.Context, blameParts []*git.BlamePart) map[st return commitNames } -func renderBlame(ctx *context.Context, blameParts []*git.BlamePart, commitNames map[string]*user_model.UserCommit) { - repoLink := ctx.Repo.RepoLink +func renderBlameFillFirstBlameRow(repoLink string, avatarUtils *templates.AvatarUtils, part *git.BlamePart, commit *user_model.UserCommit, br *blameRow) { + if commit.User != nil { + br.Avatar = avatarUtils.Avatar(commit.User, 18) + } else { + br.Avatar = avatarUtils.AvatarByEmail(commit.Author.Email, commit.Author.Name, 18) + } + br.PreviousSha = part.PreviousSha + br.PreviousShaURL = fmt.Sprintf("%s/blame/commit/%s/%s", repoLink, url.PathEscape(part.PreviousSha), util.PathEscapeSegments(part.PreviousPath)) + br.CommitURL = fmt.Sprintf("%s/commit/%s", repoLink, url.PathEscape(part.Sha)) + br.CommitMessage = commit.CommitMessage + br.CommitSince = templates.TimeSince(commit.Author.When) +} + +func renderBlame(ctx *context.Context, blameParts []*git.BlamePart, commitNames map[string]*user_model.UserCommit) { language, err := languagestats.GetFileLanguage(ctx, ctx.Repo.GitRepo, ctx.Repo.CommitID, ctx.Repo.TreePath) if err != nil { log.Error("Unable to get file language for %-v:%s. Error: %v", ctx.Repo.Repository, ctx.Repo.TreePath, err) } - lines := make([]string, 0) + buf := &bytes.Buffer{} rows := make([]*blameRow, 0) + avatarUtils := templates.NewAvatarUtils(ctx) + rowNumber := 0 // will be 1-based + for _, part := range blameParts { + for partLineIdx, line := range part.Lines { + rowNumber++ + + br := &blameRow{RowNumber: rowNumber} + rows = append(rows, br) + + if int64(buf.Len()) < setting.UI.MaxDisplayFileSize { + buf.WriteString(line) + buf.WriteByte('\n') + } + + if partLineIdx == 0 { + renderBlameFillFirstBlameRow(ctx.Repo.RepoLink, avatarUtils, part, commitNames[part.Sha], br) + } + } + } + escapeStatus := &charset.EscapeStatus{} - var lexerName string - - avatarUtils := templates.NewAvatarUtils(ctx) - i := 0 - commitCnt := 0 - for _, part := range blameParts { - for index, line := range part.Lines { - i++ - lines = append(lines, line) - - br := &blameRow{ - RowNumber: i, - } - - commit := commitNames[part.Sha] - if index == 0 { - // Count commit number - commitCnt++ - - // User avatar image - commitSince := templates.TimeSince(commit.Author.When) - - var avatar string - if commit.User != nil { - avatar = string(avatarUtils.Avatar(commit.User, 18)) - } else { - avatar = string(avatarUtils.AvatarByEmail(commit.Author.Email, commit.Author.Name, 18, "tw-mr-2")) - } - - br.Avatar = gotemplate.HTML(avatar) - br.RepoLink = repoLink - br.PartSha = part.Sha - br.PreviousSha = part.PreviousSha - br.PreviousShaURL = fmt.Sprintf("%s/blame/commit/%s/%s", repoLink, url.PathEscape(part.PreviousSha), util.PathEscapeSegments(part.PreviousPath)) - br.CommitURL = fmt.Sprintf("%s/commit/%s", repoLink, url.PathEscape(part.Sha)) - br.CommitMessage = commit.CommitMessage - br.CommitSince = commitSince - } - - if i != len(lines)-1 { - line += "\n" - } - line, lexerNameForLine := highlight.Code(path.Base(ctx.Repo.TreePath), language, line) - - // set lexer name to the first detected lexer. this is certainly suboptimal and - // we should instead highlight the whole file at once - if lexerName == "" { - lexerName = lexerNameForLine - } - - br.EscapeStatus, br.Code = charset.EscapeControlHTML(line, ctx.Locale) - rows = append(rows, br) - escapeStatus = escapeStatus.Or(br.EscapeStatus) + bufContent := buf.Bytes() + bufContent = charset.ToUTF8(bufContent, charset.ConvertOpts{}) + highlighted, lexerName := highlight.Code(path.Base(ctx.Repo.TreePath), language, util.UnsafeBytesToString(bufContent)) + unsafeLines := highlight.UnsafeSplitHighlightedLines(highlighted) + for i, br := range rows { + var line template.HTML + if i < len(rows) { + line = template.HTML(util.UnsafeBytesToString(unsafeLines[i])) } + br.EscapeStatus, br.Code = charset.EscapeControlHTML(line, ctx.Locale) + escapeStatus = escapeStatus.Or(br.EscapeStatus) } ctx.Data["EscapeStatus"] = escapeStatus ctx.Data["BlameRows"] = rows - ctx.Data["CommitCnt"] = commitCnt ctx.Data["LexerName"] = lexerName } diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index f8fde6ab29..34e94671a2 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -1336,35 +1336,11 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit return diff, nil } -func splitHighlightLines(buf []byte) (ret [][]byte) { - lineCount := bytes.Count(buf, []byte("\n")) + 1 - ret = make([][]byte, 0, lineCount) - nlTagClose := []byte("\n" right after \n, sometimes before. - // * "text\n" - // * "text\n" - if bytes.HasPrefix(buf[pos:], nlTagClose) { - pos1 := bytes.IndexByte(buf[pos:], '>') - if pos1 != -1 { - pos += pos1 - } - } - ret = append(ret, buf[:pos+1]) - buf = buf[pos+1:] - } -} - func highlightCodeLines(diffFile *DiffFile, isLeft bool, rawContent []byte) map[int]template.HTML { content := util.UnsafeBytesToString(charset.ToUTF8(rawContent, charset.ConvertOpts{})) highlightedNewContent, _ := highlight.Code(diffFile.Name, diffFile.Language, content) - splitLines := splitHighlightLines([]byte(highlightedNewContent)) - lines := make(map[int]template.HTML, len(splitLines)) + unsafeLines := highlight.UnsafeSplitHighlightedLines(highlightedNewContent) + lines := make(map[int]template.HTML, len(unsafeLines)) // only save the highlighted lines we need, but not the whole file, to save memory for _, sec := range diffFile.Sections { for _, ln := range sec.Lines { @@ -1374,8 +1350,8 @@ func highlightCodeLines(diffFile *DiffFile, isLeft bool, rawContent []byte) map[ } if lineIdx >= 1 { idx := lineIdx - 1 - if idx < len(splitLines) { - lines[idx] = template.HTML(splitLines[idx]) + if idx < len(unsafeLines) { + lines[idx] = template.HTML(util.UnsafeBytesToString(unsafeLines[idx])) } } } diff --git a/templates/repo/blame.tmpl b/templates/repo/blame.tmpl index c4d9f0741f..9cd4b2a122 100644 --- a/templates/repo/blame.tmpl +++ b/templates/repo/blame.tmpl @@ -38,7 +38,7 @@ {{range $row := .BlameRows}} - +
diff --git a/web_src/css/base.css b/web_src/css/base.css index 0e690a0265..36b3d118ae 100644 --- a/web_src/css/base.css +++ b/web_src/css/base.css @@ -919,7 +919,7 @@ overflow-menu .ui.label { .blame-avatar { display: flex; align-items: center; - margin-right: 4px; + margin-right: 6px; } tr.top-line-blame {