Skip to content

Commit 0eded58

Browse files
committed
pkg/nixpath/chunker: add
This provides two different implementations to chunk data.
1 parent 60a2ce4 commit 0eded58

File tree

6 files changed

+182
-0
lines changed

6 files changed

+182
-0
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ require (
66
github.com/alecthomas/kong v0.5.0
77
github.com/dgraph-io/badger/v3 v3.2103.2
88
github.com/google/go-cmp v0.5.5
9+
github.com/poolpOrg/go-fastcdc v0.0.0-20211130135149-aa8a1e8a10db
910
github.com/stretchr/testify v1.7.0
1011
)
1112

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
6767
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
6868
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
6969
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
70+
github.com/poolpOrg/go-fastcdc v0.0.0-20211130135149-aa8a1e8a10db h1:PfgdUkbymefXsGoYrYjCpZh1PcmQ3tDHKGt1fSDm1+o=
71+
github.com/poolpOrg/go-fastcdc v0.0.0-20211130135149-aa8a1e8a10db/go.mod h1:9KpZcfiu0ZuQsRGTJ8ggDEjKUndlV6TFf8IMKNhW1qA=
7072
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
7173
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
7274
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
package chunker_test
2+
3+
import (
4+
"bytes"
5+
_ "embed"
6+
"errors"
7+
"io"
8+
"testing"
9+
10+
"github.com/nix-community/go-nix/pkg/nixpath/chunker"
11+
"github.com/stretchr/testify/assert"
12+
)
13+
14+
//go:embed simple.go
15+
var testData []byte
16+
17+
func TestChunkers(t *testing.T) {
18+
fastCDCChunker, err := chunker.NewFastCDCChunker(bytes.NewReader(testData))
19+
if err != nil {
20+
panic(err)
21+
}
22+
23+
chunkers := []struct {
24+
Name string
25+
Chunker chunker.Chunker
26+
}{
27+
{
28+
"Simple",
29+
chunker.NewSimpleChunker(bytes.NewReader(testData)),
30+
},
31+
{
32+
"FastCDC",
33+
fastCDCChunker,
34+
},
35+
}
36+
37+
for _, chunker := range chunkers {
38+
t.Run(chunker.Name, func(t *testing.T) {
39+
// grab data out of the chunker.
40+
// Ensure it matches testData.
41+
42+
var receivedData bytes.Buffer
43+
offset := uint64(0)
44+
45+
for {
46+
chunk, err := chunker.Chunker.Next()
47+
if err != nil {
48+
if errors.Is(err, io.EOF) {
49+
break
50+
}
51+
assert.NoError(t, err, "no other error other than EOF is accepted")
52+
}
53+
// check the chunk itself looks sane
54+
assert.True(t,
55+
uint64(len(chunk.Data)) == chunk.Size,
56+
"the length of the chunk data needs to match what's written in Size",
57+
)
58+
59+
// check the offset is sane
60+
assert.Equal(t, offset, chunk.Offset, "recorded offset size doesn't match passed offset size")
61+
62+
offset += chunk.Size
63+
64+
// write the data into the receivedData buffer
65+
if _, err := receivedData.Write(chunk.Data); err != nil {
66+
panic(err)
67+
}
68+
}
69+
70+
// compare received chunk contents with what was passed into the chunker
71+
assert.Equal(t, testData, receivedData.Bytes())
72+
})
73+
}
74+
}

pkg/nixpath/chunker/fastcdc.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package chunker
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"io"
7+
8+
fastcdc "github.com/poolpOrg/go-fastcdc"
9+
)
10+
11+
func NewFastCDCChunker(r io.Reader) (Chunker, error) { // nolint:ireturn
12+
fastcdc.NewChunkerOptions()
13+
chunkerOpts := fastcdc.NewChunkerOptions()
14+
15+
// FUTUREWORK: Test with different chunk sizes
16+
chunkerOpts.NormalSize = 64 * 2024
17+
chunkerOpts.MinSize = chunkerOpts.NormalSize / 4
18+
chunkerOpts.MaxSize = chunkerOpts.NormalSize * 4
19+
20+
c, err := fastcdc.NewChunker(r, chunkerOpts)
21+
if err != nil {
22+
return nil, fmt.Errorf("unable to initialize fastcdc: %w", err)
23+
}
24+
25+
return &FastCDCChunker{
26+
c: c,
27+
}, nil
28+
}
29+
30+
type FastCDCChunker struct {
31+
c *fastcdc.Chunker
32+
}
33+
34+
func (f *FastCDCChunker) Next() (*Chunk, error) {
35+
chunk, err := f.c.Next()
36+
if err != nil {
37+
if errors.Is(err, io.EOF) {
38+
return nil, err
39+
}
40+
41+
return nil, fmt.Errorf("error getting next chunk: %w", err)
42+
}
43+
44+
return &Chunk{
45+
Offset: chunk.Offset,
46+
Size: uint64(chunk.Size),
47+
Data: chunk.Data,
48+
}, nil
49+
}

pkg/nixpath/chunker/interface.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package chunker
2+
3+
type Chunk struct {
4+
Offset uint64
5+
Size uint64
6+
Data []byte
7+
}
8+
9+
// Chunker describes the interface that a given chunker needs to implement.
10+
// Next() is periodically called until io.EOF is encountered.
11+
// In case of no error, Next() returns a new chunk.
12+
13+
type Chunker interface {
14+
Next() (*Chunk, error)
15+
}

pkg/nixpath/chunker/simple.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package chunker
2+
3+
import (
4+
"bytes"
5+
"fmt"
6+
"io"
7+
)
8+
9+
func NewSimpleChunker(r io.Reader) Chunker { // nolint:ireturn
10+
return &SimpleChunker{
11+
r: r,
12+
}
13+
}
14+
15+
// SimpleChunker simply returns one chunk for all of the contents.
16+
type SimpleChunker struct {
17+
r io.Reader
18+
done bool
19+
}
20+
21+
func (s *SimpleChunker) Next() (*Chunk, error) {
22+
// if we already read everything, return io.EOF
23+
if s.done {
24+
return nil, io.EOF
25+
}
26+
27+
var buf bytes.Buffer
28+
29+
n, err := io.Copy(&buf, s.r)
30+
if err != nil {
31+
return nil, fmt.Errorf("error returning from reader: %w", err)
32+
}
33+
34+
s.done = true
35+
36+
return &Chunk{
37+
Offset: 0,
38+
Size: uint64(n),
39+
Data: buf.Bytes(),
40+
}, nil
41+
}

0 commit comments

Comments
 (0)