mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-31 10:56:10 +01:00 
			
		
		
		
	Refactor parseTreeEntries, speed up tree list (#21368)
Close #20315 (fix the panic when parsing invalid input), Speed up #20231 (use ls-tree without size field) Introduce ListEntriesRecursiveFast (ls-tree without size) and ListEntriesRecursiveWithSize (ls-tree with size)
This commit is contained in:
		| @@ -22,70 +22,72 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { | |||||||
| 	return parseTreeEntries(data, nil) | 	return parseTreeEntries(data, nil) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | var sepSpace = []byte{' '} | ||||||
|  |  | ||||||
| func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { | func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { | ||||||
| 	entries := make([]*TreeEntry, 0, 10) | 	var err error | ||||||
|  | 	entries := make([]*TreeEntry, 0, bytes.Count(data, []byte{'\n'})+1) | ||||||
| 	for pos := 0; pos < len(data); { | 	for pos := 0; pos < len(data); { | ||||||
| 		// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>" | 		// expect line to be of the form: | ||||||
|  | 		// <mode> <type> <sha> <space-padded-size>\t<filename> | ||||||
|  | 		// <mode> <type> <sha>\t<filename> | ||||||
|  | 		posEnd := bytes.IndexByte(data[pos:], '\n') | ||||||
|  | 		if posEnd == -1 { | ||||||
|  | 			posEnd = len(data) | ||||||
|  | 		} else { | ||||||
|  | 			posEnd += pos | ||||||
|  | 		} | ||||||
|  | 		line := data[pos:posEnd] | ||||||
|  | 		posTab := bytes.IndexByte(line, '\t') | ||||||
|  | 		if posTab == -1 { | ||||||
|  | 			return nil, fmt.Errorf("invalid ls-tree output (no tab): %q", line) | ||||||
|  | 		} | ||||||
|  |  | ||||||
| 		entry := new(TreeEntry) | 		entry := new(TreeEntry) | ||||||
| 		entry.ptree = ptree | 		entry.ptree = ptree | ||||||
| 		if pos+6 > len(data) { |  | ||||||
| 			return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) | 		entryAttrs := line[:posTab] | ||||||
|  | 		entryName := line[posTab+1:] | ||||||
|  |  | ||||||
|  | 		entryMode, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace) | ||||||
|  | 		_ /* entryType */, entryAttrs, _ = bytes.Cut(entryAttrs, sepSpace) // the type is not used, the mode is enough to determine the type | ||||||
|  | 		entryObjectID, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace) | ||||||
|  | 		if len(entryAttrs) > 0 { | ||||||
|  | 			entrySize := entryAttrs // the last field is the space-padded-size | ||||||
|  | 			entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(entrySize)), 10, 64) | ||||||
|  | 			entry.sized = true | ||||||
| 		} | 		} | ||||||
| 		switch string(data[pos : pos+6]) { |  | ||||||
|  | 		switch string(entryMode) { | ||||||
| 		case "100644": | 		case "100644": | ||||||
| 			entry.entryMode = EntryModeBlob | 			entry.entryMode = EntryModeBlob | ||||||
| 			pos += 12 // skip over "100644 blob " |  | ||||||
| 		case "100755": | 		case "100755": | ||||||
| 			entry.entryMode = EntryModeExec | 			entry.entryMode = EntryModeExec | ||||||
| 			pos += 12 // skip over "100755 blob " |  | ||||||
| 		case "120000": | 		case "120000": | ||||||
| 			entry.entryMode = EntryModeSymlink | 			entry.entryMode = EntryModeSymlink | ||||||
| 			pos += 12 // skip over "120000 blob " |  | ||||||
| 		case "160000": | 		case "160000": | ||||||
| 			entry.entryMode = EntryModeCommit | 			entry.entryMode = EntryModeCommit | ||||||
| 			pos += 14 // skip over "160000 object " |  | ||||||
| 		case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons | 		case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons | ||||||
| 			entry.entryMode = EntryModeTree | 			entry.entryMode = EntryModeTree | ||||||
| 			pos += 12 // skip over "040000 tree " |  | ||||||
| 		default: | 		default: | ||||||
| 			return nil, fmt.Errorf("unknown type: %v", string(data[pos:pos+6])) | 			return nil, fmt.Errorf("unknown type: %v", string(entryMode)) | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		if pos+40 > len(data) { | 		entry.ID, err = NewIDFromString(string(entryObjectID)) | ||||||
| 			return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) |  | ||||||
| 		} |  | ||||||
| 		id, err := NewIDFromString(string(data[pos : pos+40])) |  | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return nil, fmt.Errorf("Invalid ls-tree output: %v", err) | 			return nil, fmt.Errorf("invalid ls-tree output (invalid object id): %q, err: %w", line, err) | ||||||
| 		} |  | ||||||
| 		entry.ID = id |  | ||||||
| 		pos += 41 // skip over sha and trailing space |  | ||||||
|  |  | ||||||
| 		end := pos + bytes.IndexByte(data[pos:], '\t') |  | ||||||
| 		if end < pos { |  | ||||||
| 			return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) |  | ||||||
| 		} |  | ||||||
| 		entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) |  | ||||||
| 		entry.sized = true |  | ||||||
|  |  | ||||||
| 		pos = end + 1 |  | ||||||
|  |  | ||||||
| 		end = pos + bytes.IndexByte(data[pos:], '\n') |  | ||||||
| 		if end < pos { |  | ||||||
| 			return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) |  | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		// In case entry name is surrounded by double quotes(it happens only in git-shell). | 		if len(entryName) > 0 && entryName[0] == '"' { | ||||||
| 		if data[pos] == '"' { | 			entry.name, err = strconv.Unquote(string(entryName)) | ||||||
| 			entry.name, err = strconv.Unquote(string(data[pos:end])) |  | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return nil, fmt.Errorf("Invalid ls-tree output: %v", err) | 				return nil, fmt.Errorf("invalid ls-tree output (invalid name): %q, err: %w", line, err) | ||||||
| 			} | 			} | ||||||
| 		} else { | 		} else { | ||||||
| 			entry.name = string(data[pos:end]) | 			entry.name = string(entryName) | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		pos = end + 1 | 		pos = posEnd + 1 | ||||||
| 		entries = append(entries, entry) | 		entries = append(entries, entry) | ||||||
| 	} | 	} | ||||||
| 	return entries, nil | 	return entries, nil | ||||||
|   | |||||||
| @@ -12,7 +12,7 @@ import ( | |||||||
| 	"github.com/stretchr/testify/assert" | 	"github.com/stretchr/testify/assert" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| func TestParseTreeEntries(t *testing.T) { | func TestParseTreeEntriesLong(t *testing.T) { | ||||||
| 	testCases := []struct { | 	testCases := []struct { | ||||||
| 		Input    string | 		Input    string | ||||||
| 		Expected []*TreeEntry | 		Expected []*TreeEntry | ||||||
| @@ -59,11 +59,47 @@ func TestParseTreeEntries(t *testing.T) { | |||||||
| 		assert.NoError(t, err) | 		assert.NoError(t, err) | ||||||
| 		assert.Len(t, entries, len(testCase.Expected)) | 		assert.Len(t, entries, len(testCase.Expected)) | ||||||
| 		for i, entry := range entries { | 		for i, entry := range entries { | ||||||
| 			assert.EqualValues(t, testCase.Expected[i].ID, entry.ID) | 			assert.EqualValues(t, testCase.Expected[i], entry) | ||||||
| 			assert.EqualValues(t, testCase.Expected[i].name, entry.name) |  | ||||||
| 			assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode) |  | ||||||
| 			assert.EqualValues(t, testCase.Expected[i].sized, entry.sized) |  | ||||||
| 			assert.EqualValues(t, testCase.Expected[i].size, entry.size) |  | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func TestParseTreeEntriesShort(t *testing.T) { | ||||||
|  | 	testCases := []struct { | ||||||
|  | 		Input    string | ||||||
|  | 		Expected []*TreeEntry | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af	README.md | ||||||
|  | 040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d	assets | ||||||
|  | `, | ||||||
|  | 			Expected: []*TreeEntry{ | ||||||
|  | 				{ | ||||||
|  | 					ID:        MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"), | ||||||
|  | 					name:      "README.md", | ||||||
|  | 					entryMode: EntryModeBlob, | ||||||
|  | 				}, | ||||||
|  | 				{ | ||||||
|  | 					ID:        MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"), | ||||||
|  | 					name:      "assets", | ||||||
|  | 					entryMode: EntryModeTree, | ||||||
|  | 				}, | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 	} | ||||||
|  | 	for _, testCase := range testCases { | ||||||
|  | 		entries, err := ParseTreeEntries([]byte(testCase.Input)) | ||||||
|  | 		assert.NoError(t, err) | ||||||
|  | 		assert.Len(t, entries, len(testCase.Expected)) | ||||||
|  | 		for i, entry := range entries { | ||||||
|  | 			assert.EqualValues(t, testCase.Expected[i], entry) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestParseTreeEntriesInvalid(t *testing.T) { | ||||||
|  | 	// there was a panic: "runtime error: slice bounds out of range" when the input was invalid: #20315 | ||||||
|  | 	entries, err := ParseTreeEntries([]byte("100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af")) | ||||||
|  | 	assert.Error(t, err) | ||||||
|  | 	assert.Len(t, entries, 0) | ||||||
|  | } | ||||||
|   | |||||||
| @@ -57,7 +57,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err | |||||||
|  |  | ||||||
| 	tree := commit.Tree | 	tree := commit.Tree | ||||||
|  |  | ||||||
| 	entries, err := tree.ListEntriesRecursive() | 	entries, err := tree.ListEntriesRecursiveWithSize() | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
| 	} | 	} | ||||||
|   | |||||||
| @@ -57,8 +57,8 @@ func (t *Tree) ListEntries() (Entries, error) { | |||||||
| 	return entries, nil | 	return entries, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| // ListEntriesRecursive returns all entries of current tree recursively including all subtrees | // ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees | ||||||
| func (t *Tree) ListEntriesRecursive() (Entries, error) { | func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) { | ||||||
| 	if t.gogitTree == nil { | 	if t.gogitTree == nil { | ||||||
| 		err := t.loadTreeObject() | 		err := t.loadTreeObject() | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| @@ -92,3 +92,8 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) { | |||||||
|  |  | ||||||
| 	return entries, nil | 	return entries, nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // ListEntriesRecursiveFast is the alias of ListEntriesRecursiveWithSize for the gogit version | ||||||
|  | func (t *Tree) ListEntriesRecursiveFast() (Entries, error) { | ||||||
|  | 	return t.ListEntriesRecursiveWithSize() | ||||||
|  | } | ||||||
|   | |||||||
| @@ -99,13 +99,16 @@ func (t *Tree) ListEntries() (Entries, error) { | |||||||
| 	return t.entries, err | 	return t.entries, err | ||||||
| } | } | ||||||
|  |  | ||||||
| // ListEntriesRecursive returns all entries of current tree recursively including all subtrees | // listEntriesRecursive returns all entries of current tree recursively including all subtrees | ||||||
| func (t *Tree) ListEntriesRecursive() (Entries, error) { | // extraArgs could be "-l" to get the size, which is slower | ||||||
|  | func (t *Tree) listEntriesRecursive(extraArgs ...string) (Entries, error) { | ||||||
| 	if t.entriesRecursiveParsed { | 	if t.entriesRecursiveParsed { | ||||||
| 		return t.entriesRecursive, nil | 		return t.entriesRecursive, nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	stdout, _, runErr := NewCommand(t.repo.Ctx, "ls-tree", "-t", "-l", "-r", t.ID.String()).RunStdBytes(&RunOpts{Dir: t.repo.Path}) | 	args := append([]string{"ls-tree", "-t", "-r"}, extraArgs...) | ||||||
|  | 	args = append(args, t.ID.String()) | ||||||
|  | 	stdout, _, runErr := NewCommand(t.repo.Ctx, args...).RunStdBytes(&RunOpts{Dir: t.repo.Path}) | ||||||
| 	if runErr != nil { | 	if runErr != nil { | ||||||
| 		return nil, runErr | 		return nil, runErr | ||||||
| 	} | 	} | ||||||
| @@ -118,3 +121,13 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) { | |||||||
|  |  | ||||||
| 	return t.entriesRecursive, err | 	return t.entriesRecursive, err | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // ListEntriesRecursiveFast returns all entries of current tree recursively including all subtrees, no size | ||||||
|  | func (t *Tree) ListEntriesRecursiveFast() (Entries, error) { | ||||||
|  | 	return t.listEntriesRecursive() | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees, with size | ||||||
|  | func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) { | ||||||
|  | 	return t.listEntriesRecursive("--long") | ||||||
|  | } | ||||||
|   | |||||||
| @@ -22,9 +22,9 @@ func TreeList(ctx *context.Context) { | |||||||
| 		return | 		return | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	entries, err := tree.ListEntriesRecursive() | 	entries, err := tree.ListEntriesRecursiveFast() | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		ctx.ServerError("ListEntriesRecursive", err) | 		ctx.ServerError("ListEntriesRecursiveFast", err) | ||||||
| 		return | 		return | ||||||
| 	} | 	} | ||||||
| 	entries.CustomSort(base.NaturalSortLess) | 	entries.CustomSort(base.NaturalSortLess) | ||||||
|   | |||||||
| @@ -29,7 +29,7 @@ func GetTreeBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git | |||||||
| 	tree.URL = repo.APIURL() + "/git/trees/" + url.PathEscape(tree.SHA) | 	tree.URL = repo.APIURL() + "/git/trees/" + url.PathEscape(tree.SHA) | ||||||
| 	var entries git.Entries | 	var entries git.Entries | ||||||
| 	if recursive { | 	if recursive { | ||||||
| 		entries, err = gitTree.ListEntriesRecursive() | 		entries, err = gitTree.ListEntriesRecursiveWithSize() | ||||||
| 	} else { | 	} else { | ||||||
| 		entries, err = gitTree.ListEntries() | 		entries, err = gitTree.ListEntries() | ||||||
| 	} | 	} | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user