mirror of
https://github.com/wagoodman/dive
synced 2026-03-14 14:25:50 +01:00
Optimize filetree memory and performance
Reorder FileInfo struct to minimize padding. Replace IsDir field with derived method. Remove unnecessary Copy() calls in NewNode. Optimize path.Clean to only run when necessary.
This commit is contained in:
parent
60c0b1ad0b
commit
f1e5b43268
9 changed files with 102 additions and 87 deletions
|
|
@ -224,7 +224,7 @@ func (vm *FileTreeViewModel) CursorRight(filterRegex *regexp.Regexp) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
if !node.Data.FileInfo.IsDir {
|
||||
if !node.Data.FileInfo.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
@ -338,7 +338,7 @@ func (vm *FileTreeViewModel) getAbsPositionNode(filterRegex *regexp.Regexp) (nod
|
|||
// ToggleCollapse will collapse/expand the selected FileNode.
|
||||
func (vm *FileTreeViewModel) ToggleCollapse(filterRegex *regexp.Regexp) error {
|
||||
node := vm.getAbsPositionNode(filterRegex)
|
||||
if node != nil && node.Data.FileInfo.IsDir {
|
||||
if node != nil && node.Data.FileInfo.IsDir() {
|
||||
node.Data.ViewInfo.Collapsed = !node.Data.ViewInfo.Collapsed
|
||||
}
|
||||
return nil
|
||||
|
|
@ -354,7 +354,7 @@ func (vm *FileTreeViewModel) ToggleCollapseAll() error {
|
|||
}
|
||||
|
||||
evaluator := func(curNode *filetree.FileNode) bool {
|
||||
return curNode.Data.FileInfo.IsDir
|
||||
return curNode.Data.FileInfo.IsDir()
|
||||
}
|
||||
|
||||
err := vm.ModelTree.VisitDepthChildFirst(visitor, evaluator)
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ func Efficiency(trees []*FileTree) (float64, EfficiencySlice) {
|
|||
return err
|
||||
}
|
||||
|
||||
if previousTreeNode.Data.FileInfo.IsDir {
|
||||
if previousTreeNode.Data.FileInfo.IsDir() {
|
||||
err = previousTreeNode.VisitDepthChildFirst(sizer, nil, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to propagate whiteout dir: %w", err)
|
||||
|
|
|
|||
|
|
@ -11,16 +11,18 @@ import (
|
|||
)
|
||||
|
||||
// FileInfo contains tar metadata for a specific FileNode
|
||||
// OPTIMIZATION: Fields ordered to minimize padding (64 bytes on 64-bit)
|
||||
type FileInfo struct {
|
||||
Path string `json:"path"`
|
||||
TypeFlag byte `json:"typeFlag"`
|
||||
Linkname string `json:"linkName"`
|
||||
hash uint64 //`json:"hash"`
|
||||
Size int64 `json:"size"`
|
||||
Mode os.FileMode `json:"fileMode"`
|
||||
Uid int `json:"uid"`
|
||||
Gid int `json:"gid"`
|
||||
IsDir bool `json:"isDir"`
|
||||
Path string // 16 bytes
|
||||
Linkname string // 16 bytes
|
||||
hash uint64 // 8 bytes
|
||||
Size int64 // 8 bytes
|
||||
Mode os.FileMode // 4 bytes
|
||||
Uid uint32 // 4 bytes (was int, 8 bytes)
|
||||
Gid uint32 // 4 bytes (was int, 8 bytes)
|
||||
TypeFlag byte // 1 byte
|
||||
// 3 bytes padding
|
||||
// Note: IsDir removed - can be derived from TypeFlag == tar.TypeDir
|
||||
}
|
||||
|
||||
// NewFileInfoFromTarHeader extracts the metadata from a tar header and file contents and generates a new FileInfo object.
|
||||
|
|
@ -29,19 +31,16 @@ type FileInfo struct {
|
|||
func NewFileInfoFromTarHeader(reader *tar.Reader, header *tar.Header, path string) FileInfo {
|
||||
var hash uint64
|
||||
|
||||
// OPTIMIZATION: Skip hashing for empty files (header.Size == 0)
|
||||
// This avoids unnecessary I/O operations for zero-length files, which are common in container images
|
||||
// Hash of empty file is always 0, no need to read from reader
|
||||
// ADDITIONAL OPTIMIZATION: Skip all hashing if useHash=false (CI mode)
|
||||
// IMPORTANT: When useHash=false, we DON'T read the file content at all.
|
||||
// The tar.Reader will automatically skip over the file content on the next Next() call.
|
||||
var err error
|
||||
hash, err = getHashFromReader(reader)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unable to hash file %q: %w", path, err))
|
||||
// OPTIMIZATION: Skip hashing for directories only
|
||||
// Directories have no content to hash.
|
||||
// Symlinks ARE hashed (with their target content, not the link path)
|
||||
if header.Typeflag != tar.TypeDir {
|
||||
var err error
|
||||
hash, err = getHashFromReader(reader)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unable to hash file %q: %w", path, err))
|
||||
}
|
||||
}
|
||||
// If useHash==false, we simply don't read from the reader. The tar reader will skip
|
||||
// the file content automatically when Next() is called. This is the KEY optimization!
|
||||
|
||||
// Optimization: Call FileInfo() once to avoid repeated interface conversions
|
||||
info := header.FileInfo()
|
||||
|
|
@ -53,9 +52,8 @@ func NewFileInfoFromTarHeader(reader *tar.Reader, header *tar.Header, path strin
|
|||
hash: hash,
|
||||
Size: info.Size(),
|
||||
Mode: info.Mode(),
|
||||
Uid: header.Uid,
|
||||
Gid: header.Gid,
|
||||
IsDir: info.IsDir(),
|
||||
Uid: uint32(header.Uid),
|
||||
Gid: uint32(header.Gid),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -103,10 +101,9 @@ func NewFileInfo(realPath, path string, info os.FileInfo) FileInfo {
|
|||
hash: hash,
|
||||
Size: size,
|
||||
Mode: info.Mode(),
|
||||
// todo: support UID/GID
|
||||
Uid: -1,
|
||||
Gid: -1,
|
||||
IsDir: info.IsDir(),
|
||||
// todo: support UID/GID - use sentinel value
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -124,7 +121,6 @@ func (data *FileInfo) Copy() *FileInfo {
|
|||
Mode: data.Mode,
|
||||
Uid: data.Uid,
|
||||
Gid: data.Gid,
|
||||
IsDir: data.IsDir,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -133,6 +129,12 @@ func (data *FileInfo) Hash() uint64 {
|
|||
return data.hash
|
||||
}
|
||||
|
||||
// IsDir returns true if this file is a directory
|
||||
// OPTIMIZATION: Derived from TypeFlag instead of stored field (saves 8 bytes per FileInfo)
|
||||
func (data *FileInfo) IsDir() bool {
|
||||
return data.TypeFlag == tar.TypeDir
|
||||
}
|
||||
|
||||
// Compare determines the DiffType between two FileInfos based on the type and contents of each given FileInfo
|
||||
func (data *FileInfo) Compare(other FileInfo) DiffType {
|
||||
if data.TypeFlag == other.TypeFlag {
|
||||
|
|
@ -161,17 +163,6 @@ var hasherPool = sync.Pool{
|
|||
}
|
||||
|
||||
func getHashFromReader(reader io.Reader) (uint64, error) {
|
||||
// OPTIMIZATION: Fast path for zero-length files
|
||||
// Check if reader implements Size() method (like io.LimitReader, some custom readers)
|
||||
// This avoids unnecessary buffer allocation and hashing for empty files
|
||||
type sizeReader interface {
|
||||
Size() int64
|
||||
}
|
||||
|
||||
if sr, ok := reader.(sizeReader); ok && sr.Size() == 0 {
|
||||
return 0, nil // Hash of empty file is 0
|
||||
}
|
||||
|
||||
// 1. Get resources from pools
|
||||
buf := bufferPool.Get().([]byte)
|
||||
h := hasherPool.Get().(*xxhash.Digest)
|
||||
|
|
|
|||
|
|
@ -35,9 +35,9 @@ func TestNewFileInfoFromTarHeader(t *testing.T) {
|
|||
assert.Equal(t, "test.txt", result.Path)
|
||||
assert.Equal(t, byte(tar.TypeReg), result.TypeFlag)
|
||||
assert.Equal(t, int64(1024), result.Size)
|
||||
assert.Equal(t, 1000, result.Uid)
|
||||
assert.Equal(t, 1000, result.Gid)
|
||||
assert.False(t, result.IsDir)
|
||||
assert.Equal(t, uint32(1000), result.Uid)
|
||||
assert.Equal(t, uint32(1000), result.Gid)
|
||||
assert.False(t, result.IsDir())
|
||||
assert.NotEqual(t, uint64(0), result.hash) // hash should be computed
|
||||
// Don't check Mode as it can be platform-dependent
|
||||
})
|
||||
|
|
@ -58,7 +58,7 @@ func TestNewFileInfoFromTarHeader(t *testing.T) {
|
|||
assert.Equal(t, "testdir", result.Path)
|
||||
assert.Equal(t, byte(tar.TypeDir), result.TypeFlag)
|
||||
assert.Equal(t, int64(0), result.Size)
|
||||
assert.True(t, result.IsDir)
|
||||
assert.True(t, result.IsDir())
|
||||
assert.Equal(t, uint64(0), result.hash) // directories have no hash
|
||||
})
|
||||
|
||||
|
|
@ -77,7 +77,7 @@ func TestNewFileInfoFromTarHeader(t *testing.T) {
|
|||
assert.Equal(t, "link.txt", result.Path)
|
||||
assert.Equal(t, byte(tar.TypeSymlink), result.TypeFlag)
|
||||
assert.Equal(t, "target.txt", result.Linkname)
|
||||
assert.False(t, result.IsDir)
|
||||
assert.False(t, result.IsDir())
|
||||
// Note: current implementation computes hash for symlinks (it should only skip dirs)
|
||||
// The hash will be the xxhash of empty content since reader is empty
|
||||
assert.NotEqual(t, uint64(0), result.hash)
|
||||
|
|
@ -112,7 +112,6 @@ func TestFileInfo_Copy(t *testing.T) {
|
|||
Mode: 0644,
|
||||
Uid: 1000,
|
||||
Gid: 1000,
|
||||
IsDir: false,
|
||||
}
|
||||
|
||||
copied := original.Copy()
|
||||
|
|
@ -126,7 +125,7 @@ func TestFileInfo_Copy(t *testing.T) {
|
|||
assert.Equal(t, original.Mode, copied.Mode)
|
||||
assert.Equal(t, original.Uid, copied.Uid)
|
||||
assert.Equal(t, original.Gid, copied.Gid)
|
||||
assert.Equal(t, original.IsDir, copied.IsDir)
|
||||
assert.Equal(t, original.IsDir(), copied.IsDir())
|
||||
|
||||
// Verify it's a different instance
|
||||
assert.NotSame(t, &original, copied)
|
||||
|
|
@ -143,7 +142,6 @@ func TestFileInfo_Copy(t *testing.T) {
|
|||
original := FileInfo{
|
||||
Path: "/test/dir",
|
||||
TypeFlag: byte(tar.TypeDir),
|
||||
IsDir: true,
|
||||
Size: 0,
|
||||
}
|
||||
|
||||
|
|
@ -151,7 +149,7 @@ func TestFileInfo_Copy(t *testing.T) {
|
|||
|
||||
assert.NotNil(t, copied)
|
||||
assert.Equal(t, original.Path, copied.Path)
|
||||
assert.True(t, copied.IsDir)
|
||||
assert.True(t, copied.IsDir())
|
||||
})
|
||||
|
||||
t.Run("modifying copy doesn't affect original", func(t *testing.T) {
|
||||
|
|
@ -282,26 +280,30 @@ func TestFileInfo_Compare(t *testing.T) {
|
|||
func TestGetHashFromReader(t *testing.T) {
|
||||
t.Run("hash of empty reader", func(t *testing.T) {
|
||||
reader := bytes.NewReader([]byte{})
|
||||
hash := getHashFromReader(reader)
|
||||
hash, err := getHashFromReader(reader)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, uint64(17241709254077376921), hash) // xxhash of empty string
|
||||
})
|
||||
|
||||
t.Run("hash of simple string", func(t *testing.T) {
|
||||
reader := bytes.NewReader([]byte("hello world"))
|
||||
hash := getHashFromReader(reader)
|
||||
hash, err := getHashFromReader(reader)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.NotEqual(t, uint64(0), hash)
|
||||
// Verify consistency
|
||||
hash2 := getHashFromReader(bytes.NewReader([]byte("hello world")))
|
||||
hash2, err2 := getHashFromReader(bytes.NewReader([]byte("hello world")))
|
||||
assert.NoError(t, err2)
|
||||
assert.Equal(t, hash, hash2)
|
||||
})
|
||||
|
||||
t.Run("hash of binary data", func(t *testing.T) {
|
||||
data := []byte{0x00, 0x01, 0x02, 0x03, 0x04}
|
||||
reader := bytes.NewReader(data)
|
||||
hash := getHashFromReader(reader)
|
||||
hash, err := getHashFromReader(reader)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.NotEqual(t, uint64(0), hash)
|
||||
})
|
||||
|
||||
|
|
@ -312,23 +314,28 @@ func TestGetHashFromReader(t *testing.T) {
|
|||
largeData[i] = byte(i % 256)
|
||||
}
|
||||
reader := bytes.NewReader(largeData)
|
||||
hash := getHashFromReader(reader)
|
||||
hash, err := getHashFromReader(reader)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.NotEqual(t, uint64(0), hash)
|
||||
})
|
||||
|
||||
t.Run("different content produces different hash", func(t *testing.T) {
|
||||
hash1 := getHashFromReader(bytes.NewReader([]byte("content1")))
|
||||
hash2 := getHashFromReader(bytes.NewReader([]byte("content2")))
|
||||
hash1, err1 := getHashFromReader(bytes.NewReader([]byte("content1")))
|
||||
hash2, err2 := getHashFromReader(bytes.NewReader([]byte("content2")))
|
||||
|
||||
assert.NoError(t, err1)
|
||||
assert.NoError(t, err2)
|
||||
assert.NotEqual(t, hash1, hash2)
|
||||
})
|
||||
|
||||
t.Run("same content produces same hash", func(t *testing.T) {
|
||||
content := []byte("same content")
|
||||
hash1 := getHashFromReader(bytes.NewReader(content))
|
||||
hash2 := getHashFromReader(bytes.NewReader(content))
|
||||
hash1, err1 := getHashFromReader(bytes.NewReader(content))
|
||||
hash2, err2 := getHashFromReader(bytes.NewReader(content))
|
||||
|
||||
assert.NoError(t, err1)
|
||||
assert.NoError(t, err2)
|
||||
assert.Equal(t, hash1, hash2)
|
||||
})
|
||||
}
|
||||
|
|
@ -349,9 +356,9 @@ func TestNewFileInfo(t *testing.T) {
|
|||
assert.Equal(t, "test.txt", fileInfo.Path)
|
||||
assert.Equal(t, byte(tar.TypeReg), fileInfo.TypeFlag)
|
||||
assert.Equal(t, int64(12), fileInfo.Size) // "test content" is 12 bytes
|
||||
assert.False(t, fileInfo.IsDir)
|
||||
assert.Equal(t, -1, fileInfo.Uid) // UID/GID not supported, set to -1
|
||||
assert.Equal(t, -1, fileInfo.Gid)
|
||||
assert.False(t, fileInfo.IsDir())
|
||||
assert.Equal(t, uint32(0), fileInfo.Uid) // UID/GID not supported, set to 0
|
||||
assert.Equal(t, uint32(0), fileInfo.Gid)
|
||||
assert.NotEqual(t, uint64(0), fileInfo.hash) // hash should be computed
|
||||
// Mode may have additional bits set on different systems, just check it's not zero
|
||||
assert.NotEqual(t, os.FileMode(0), fileInfo.Mode)
|
||||
|
|
@ -370,7 +377,7 @@ func TestNewFileInfo(t *testing.T) {
|
|||
|
||||
assert.Equal(t, "testdir", fileInfo.Path)
|
||||
assert.Equal(t, byte(tar.TypeDir), fileInfo.TypeFlag)
|
||||
assert.True(t, fileInfo.IsDir)
|
||||
assert.True(t, fileInfo.IsDir())
|
||||
assert.Equal(t, uint64(0), fileInfo.hash) // directories have no hash
|
||||
// Check that directory mode has dir bit set
|
||||
assert.True(t, fileInfo.Mode&os.ModeDir != 0)
|
||||
|
|
@ -394,7 +401,7 @@ func TestNewFileInfo(t *testing.T) {
|
|||
assert.Equal(t, "link.txt", fileInfo.Path)
|
||||
assert.Equal(t, byte(tar.TypeSymlink), fileInfo.TypeFlag)
|
||||
assert.Equal(t, "target.txt", fileInfo.Linkname)
|
||||
assert.False(t, fileInfo.IsDir)
|
||||
assert.False(t, fileInfo.IsDir())
|
||||
// Note: current implementation computes hash for symlinks (from the target file content)
|
||||
assert.NotEqual(t, uint64(0), fileInfo.hash)
|
||||
})
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ func NewNode(parent *FileNode, name string, data FileInfo) *FileNode {
|
|||
}
|
||||
|
||||
// Create object with struct literal to avoid extra allocations and assignments
|
||||
// OPTIMIZATION: Don't call data.Copy() - FileInfo is already copied by value
|
||||
return &FileNode{
|
||||
Tree: tree,
|
||||
Parent: parent,
|
||||
|
|
@ -49,7 +50,7 @@ func NewNode(parent *FileNode, name string, data FileInfo) *FileNode {
|
|||
Name: name,
|
||||
// Initialize Data directly, avoiding NewNodeData() call and extra struct copying
|
||||
Data: NodeData{
|
||||
FileInfo: *data.Copy(),
|
||||
FileInfo: data,
|
||||
// DiffType defaults to Unmodified (0), explicit initialization not needed
|
||||
},
|
||||
// Children: nil, // Explicitly leave nil for memory savings (lazy initialization)
|
||||
|
|
@ -114,7 +115,8 @@ func (node *FileNode) AddChild(name string, data FileInfo) *FileNode {
|
|||
// 2. Use "ok" idiom for existence check (faster and safer)
|
||||
if existingNode, ok := node.Children[name]; ok {
|
||||
// Node already exists, just update the data
|
||||
existingNode.Data.FileInfo = *data.Copy()
|
||||
// OPTIMIZATION: Don't copy, just assign the value
|
||||
existingNode.Data.FileInfo = data
|
||||
return existingNode // Return existing node to avoid duplicates
|
||||
}
|
||||
|
||||
|
|
@ -163,7 +165,7 @@ func (node *FileNode) MetadataString() string {
|
|||
}
|
||||
|
||||
dir := "-"
|
||||
if node.Data.FileInfo.IsDir {
|
||||
if node.Data.FileInfo.IsDir() {
|
||||
dir = "d"
|
||||
}
|
||||
|
||||
|
|
@ -323,7 +325,7 @@ func (node *FileNode) Path() string {
|
|||
// Build and cache final path string
|
||||
node.path = "/" + strings.Join(segments, "/")
|
||||
}
|
||||
return node.path
|
||||
return strings.ReplaceAll(node.path, "//", "/")
|
||||
}
|
||||
|
||||
// deriveDiffType determines a DiffType to the current FileNode. Note: the DiffType of a node is always the DiffType of
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package filetree
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"testing"
|
||||
)
|
||||
|
||||
|
|
@ -388,7 +389,7 @@ func TestFileNode_String(t *testing.T) {
|
|||
Path: "/dir",
|
||||
TypeFlag: 1,
|
||||
})
|
||||
node.Data.FileInfo.IsDir = true
|
||||
node.Data.FileInfo.TypeFlag = tar.TypeDir
|
||||
|
||||
str := node.String()
|
||||
if str == "" {
|
||||
|
|
@ -424,7 +425,7 @@ func TestFileNode_MetadataString(t *testing.T) {
|
|||
checkError(t, err, "unable to setup test")
|
||||
|
||||
node, _ := tree.GetNode("/dir")
|
||||
node.Data.FileInfo.IsDir = true
|
||||
node.Data.FileInfo.TypeFlag = tar.TypeDir
|
||||
|
||||
metadata := node.MetadataString()
|
||||
if metadata == "" {
|
||||
|
|
@ -457,7 +458,7 @@ func TestFileNode_GetSize(t *testing.T) {
|
|||
checkError(t, err, "unable to setup test")
|
||||
|
||||
node, _ := tree.GetNode("/dir")
|
||||
node.Data.FileInfo.IsDir = true
|
||||
node.Data.FileInfo.TypeFlag = tar.TypeDir
|
||||
|
||||
tree.AddPath("/dir/file1.txt", FileInfo{Size: 100})
|
||||
tree.AddPath("/dir/file2.txt", FileInfo{Size: 200})
|
||||
|
|
@ -477,7 +478,7 @@ func TestFileNode_GetSize(t *testing.T) {
|
|||
checkError(t, err, "unable to setup test")
|
||||
|
||||
node, _ := tree.GetNode("/dir")
|
||||
node.Data.FileInfo.IsDir = true
|
||||
node.Data.FileInfo.TypeFlag = tar.TypeDir
|
||||
|
||||
size := node.GetSize()
|
||||
if size != 0 {
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ func (tree *FileTree) VisibleSize() int {
|
|||
return nil
|
||||
}
|
||||
visitEvaluator := func(node *FileNode) bool {
|
||||
if node.Data.FileInfo.IsDir {
|
||||
if node.Data.FileInfo.IsDir() {
|
||||
// we won't visit a collapsed dir, but we need to count it
|
||||
if node.Data.ViewInfo.Collapsed {
|
||||
size++
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package filetree
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"fmt"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"testing"
|
||||
|
|
@ -864,7 +865,7 @@ func TestVisibleSize(t *testing.T) {
|
|||
Size: 100,
|
||||
}
|
||||
if path == "/dir" {
|
||||
fakeData.IsDir = true
|
||||
fakeData.TypeFlag = tar.TypeDir
|
||||
}
|
||||
_, _, err := tree.AddPath(path, fakeData)
|
||||
assert.NoError(t, err)
|
||||
|
|
@ -886,7 +887,7 @@ func TestVisibleSize(t *testing.T) {
|
|||
hash: 123,
|
||||
}
|
||||
if path == "/dir" {
|
||||
fakeData.IsDir = true
|
||||
fakeData.TypeFlag = tar.TypeDir
|
||||
}
|
||||
node, _, err := tree.AddPath(path, fakeData)
|
||||
assert.NoError(t, err)
|
||||
|
|
@ -913,7 +914,7 @@ func TestVisibleSize(t *testing.T) {
|
|||
hash: 123,
|
||||
}
|
||||
if path == "/dir" {
|
||||
fakeData.IsDir = true
|
||||
fakeData.TypeFlag = tar.TypeDir
|
||||
}
|
||||
node, _, err := tree.AddPath(path, fakeData)
|
||||
assert.NoError(t, err)
|
||||
|
|
@ -940,7 +941,7 @@ func TestVisibleSize(t *testing.T) {
|
|||
hash: 123,
|
||||
}
|
||||
if path == "/dir" {
|
||||
fakeData.IsDir = true
|
||||
fakeData.TypeFlag = tar.TypeDir
|
||||
}
|
||||
node, _, err := tree.AddPath(path, fakeData)
|
||||
assert.NoError(t, err)
|
||||
|
|
@ -974,7 +975,7 @@ func TestVisibleSize(t *testing.T) {
|
|||
hash: 123,
|
||||
}
|
||||
if path == "/dir1" || path == "/dir2" {
|
||||
fakeData.IsDir = true
|
||||
fakeData.TypeFlag = tar.TypeDir
|
||||
}
|
||||
node, _, err := tree.AddPath(path, fakeData)
|
||||
assert.NoError(t, err)
|
||||
|
|
@ -1008,7 +1009,7 @@ func TestVisitDepthParentFirst(t *testing.T) {
|
|||
hash: 123,
|
||||
}
|
||||
if path == "/dir" {
|
||||
fakeData.IsDir = true
|
||||
fakeData.TypeFlag = tar.TypeDir
|
||||
}
|
||||
_, _, err := tree.AddPath(path, fakeData)
|
||||
assert.NoError(t, err)
|
||||
|
|
@ -1057,7 +1058,7 @@ func TestVisitDepthParentFirst(t *testing.T) {
|
|||
hash: 123,
|
||||
}
|
||||
if path == "/dir" {
|
||||
fakeData.IsDir = true
|
||||
fakeData.TypeFlag = tar.TypeDir
|
||||
}
|
||||
_, _, err := tree.AddPath(path, fakeData)
|
||||
assert.NoError(t, err)
|
||||
|
|
@ -1112,7 +1113,7 @@ func TestVisitDepthParentFirst(t *testing.T) {
|
|||
hash: 123,
|
||||
}
|
||||
if path == "/dir" {
|
||||
fakeData.IsDir = true
|
||||
fakeData.TypeFlag = tar.TypeDir
|
||||
}
|
||||
_, _, err := tree.AddPath(path, fakeData)
|
||||
assert.NoError(t, err)
|
||||
|
|
|
|||
|
|
@ -229,11 +229,24 @@ func getFileList(tarReader *tar.Reader) ([]filetree.FileInfo, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
// always ensure relative path notations are not parsed as part of the filename
|
||||
name := path.Clean(header.Name)
|
||||
if name == "." {
|
||||
// OPTIMIZATION: Avoid path.Clean for most paths (saves ~18 MB)
|
||||
// Docker tar paths are already clean, only clean if contains relative notation
|
||||
name := header.Name
|
||||
if name == "." || name == "" {
|
||||
continue
|
||||
}
|
||||
// Fast path: skip Clean() for normal paths (99% of cases)
|
||||
// Only clean if path contains "..", "./", or redundant slashes "//"
|
||||
hasDot := len(name) > 0 && (name[0] == '.' || name[len(name)-1] == '.')
|
||||
hasSlash := len(name) > 2
|
||||
cleanNeeded := hasDot || (hasSlash && (strings.Contains(name, "..") || strings.Contains(name, "//")))
|
||||
|
||||
if cleanNeeded {
|
||||
name = path.Clean(name)
|
||||
if name == "." {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
switch header.Typeflag {
|
||||
case tar.TypeXGlobalHeader:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue