mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-31 10:56:10 +01:00 
			
		
		
		
	Refactor parseTreeEntries, speed up tree list (#21368)
Close #20315 (fix the panic when parsing invalid input), Speed up #20231 (use ls-tree without size field) Introduce ListEntriesRecursiveFast (ls-tree without size) and ListEntriesRecursiveWithSize (ls-tree with size)
This commit is contained in:
		| @@ -22,70 +22,72 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { | ||||
| 	return parseTreeEntries(data, nil) | ||||
| } | ||||
|  | ||||
| var sepSpace = []byte{' '} | ||||
|  | ||||
| func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { | ||||
| 	entries := make([]*TreeEntry, 0, 10) | ||||
| 	var err error | ||||
| 	entries := make([]*TreeEntry, 0, bytes.Count(data, []byte{'\n'})+1) | ||||
| 	for pos := 0; pos < len(data); { | ||||
| 		// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>" | ||||
| 		// expect line to be of the form: | ||||
| 		// <mode> <type> <sha> <space-padded-size>\t<filename> | ||||
| 		// <mode> <type> <sha>\t<filename> | ||||
| 		posEnd := bytes.IndexByte(data[pos:], '\n') | ||||
| 		if posEnd == -1 { | ||||
| 			posEnd = len(data) | ||||
| 		} else { | ||||
| 			posEnd += pos | ||||
| 		} | ||||
| 		line := data[pos:posEnd] | ||||
| 		posTab := bytes.IndexByte(line, '\t') | ||||
| 		if posTab == -1 { | ||||
| 			return nil, fmt.Errorf("invalid ls-tree output (no tab): %q", line) | ||||
| 		} | ||||
|  | ||||
| 		entry := new(TreeEntry) | ||||
| 		entry.ptree = ptree | ||||
| 		if pos+6 > len(data) { | ||||
| 			return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) | ||||
|  | ||||
| 		entryAttrs := line[:posTab] | ||||
| 		entryName := line[posTab+1:] | ||||
|  | ||||
| 		entryMode, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace) | ||||
| 		_ /* entryType */, entryAttrs, _ = bytes.Cut(entryAttrs, sepSpace) // the type is not used, the mode is enough to determine the type | ||||
| 		entryObjectID, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace) | ||||
| 		if len(entryAttrs) > 0 { | ||||
| 			entrySize := entryAttrs // the last field is the space-padded-size | ||||
| 			entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(entrySize)), 10, 64) | ||||
| 			entry.sized = true | ||||
| 		} | ||||
| 		switch string(data[pos : pos+6]) { | ||||
|  | ||||
| 		switch string(entryMode) { | ||||
| 		case "100644": | ||||
| 			entry.entryMode = EntryModeBlob | ||||
| 			pos += 12 // skip over "100644 blob " | ||||
| 		case "100755": | ||||
| 			entry.entryMode = EntryModeExec | ||||
| 			pos += 12 // skip over "100755 blob " | ||||
| 		case "120000": | ||||
| 			entry.entryMode = EntryModeSymlink | ||||
| 			pos += 12 // skip over "120000 blob " | ||||
| 		case "160000": | ||||
| 			entry.entryMode = EntryModeCommit | ||||
| 			pos += 14 // skip over "160000 object " | ||||
| 		case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons | ||||
| 			entry.entryMode = EntryModeTree | ||||
| 			pos += 12 // skip over "040000 tree " | ||||
| 		default: | ||||
| 			return nil, fmt.Errorf("unknown type: %v", string(data[pos:pos+6])) | ||||
| 			return nil, fmt.Errorf("unknown type: %v", string(entryMode)) | ||||
| 		} | ||||
|  | ||||
| 		if pos+40 > len(data) { | ||||
| 			return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) | ||||
| 		} | ||||
| 		id, err := NewIDFromString(string(data[pos : pos+40])) | ||||
| 		entry.ID, err = NewIDFromString(string(entryObjectID)) | ||||
| 		if err != nil { | ||||
| 			return nil, fmt.Errorf("Invalid ls-tree output: %v", err) | ||||
| 		} | ||||
| 		entry.ID = id | ||||
| 		pos += 41 // skip over sha and trailing space | ||||
|  | ||||
| 		end := pos + bytes.IndexByte(data[pos:], '\t') | ||||
| 		if end < pos { | ||||
| 			return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) | ||||
| 		} | ||||
| 		entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) | ||||
| 		entry.sized = true | ||||
|  | ||||
| 		pos = end + 1 | ||||
|  | ||||
| 		end = pos + bytes.IndexByte(data[pos:], '\n') | ||||
| 		if end < pos { | ||||
| 			return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) | ||||
| 			return nil, fmt.Errorf("invalid ls-tree output (invalid object id): %q, err: %w", line, err) | ||||
| 		} | ||||
|  | ||||
| 		// In case entry name is surrounded by double quotes(it happens only in git-shell). | ||||
| 		if data[pos] == '"' { | ||||
| 			entry.name, err = strconv.Unquote(string(data[pos:end])) | ||||
| 		if len(entryName) > 0 && entryName[0] == '"' { | ||||
| 			entry.name, err = strconv.Unquote(string(entryName)) | ||||
| 			if err != nil { | ||||
| 				return nil, fmt.Errorf("Invalid ls-tree output: %v", err) | ||||
| 				return nil, fmt.Errorf("invalid ls-tree output (invalid name): %q, err: %w", line, err) | ||||
| 			} | ||||
| 		} else { | ||||
| 			entry.name = string(data[pos:end]) | ||||
| 			entry.name = string(entryName) | ||||
| 		} | ||||
|  | ||||
| 		pos = end + 1 | ||||
| 		pos = posEnd + 1 | ||||
| 		entries = append(entries, entry) | ||||
| 	} | ||||
| 	return entries, nil | ||||
|   | ||||
		Reference in New Issue
	
	Block a user