diff --git a/go.mod b/go.mod index 046bcc65..9302b29c 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/alecthomas/kong v0.5.0 github.com/dgraph-io/badger/v3 v3.2103.2 github.com/google/go-cmp v0.5.5 + github.com/poolpOrg/go-fastcdc v0.0.0-20211130135149-aa8a1e8a10db github.com/stretchr/testify v1.7.0 ) diff --git a/go.sum b/go.sum index 766d067d..516b0cb2 100644 --- a/go.sum +++ b/go.sum @@ -67,6 +67,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/poolpOrg/go-fastcdc v0.0.0-20211130135149-aa8a1e8a10db h1:PfgdUkbymefXsGoYrYjCpZh1PcmQ3tDHKGt1fSDm1+o= +github.com/poolpOrg/go-fastcdc v0.0.0-20211130135149-aa8a1e8a10db/go.mod h1:9KpZcfiu0ZuQsRGTJ8ggDEjKUndlV6TFf8IMKNhW1qA= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= diff --git a/pkg/nixpath/chunker/chunker_test.go b/pkg/nixpath/chunker/chunker_test.go new file mode 100644 index 00000000..8115de95 --- /dev/null +++ b/pkg/nixpath/chunker/chunker_test.go @@ -0,0 +1,69 @@ +package chunker_test + +import ( + "bytes" + _ "embed" + "errors" + "io" + "testing" + + "github.com/nix-community/go-nix/pkg/nixpath/chunker" + "github.com/stretchr/testify/assert" +) + +//go:embed simple.go +var testData []byte + +func TestChunkers(t *testing.T) { + fastCDCChunker, err := chunker.NewFastCDCChunker(bytes.NewReader(testData)) + if err != nil { + panic(err) + } + + chunkers := []struct { + Name string + Chunker chunker.Chunker + }{ + { + "Simple", + chunker.NewSimpleChunker(bytes.NewReader(testData)), + }, + { + "FastCDC", + fastCDCChunker, + }, + } + + for _, chunker := range chunkers { + t.Run(chunker.Name, func(t *testing.T) { + // grab data out of the chunker. + // Ensure it matches testData. + + var receivedData bytes.Buffer + offset := uint64(0) + + for { + chunk, err := chunker.Chunker.Next() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + assert.NoError(t, err, "no other error other than EOF is accepted") + } + + // check the offset is sane + assert.Equal(t, offset, chunk.Offset, "recorded offset size doesn't match passed offset size") + + offset += uint64(len(chunk.Data)) + + // write the data into the receivedData buffer + if _, err := receivedData.Write(chunk.Data); err != nil { + panic(err) + } + } + + // compare received chunk contents with what was passed into the chunker + assert.Equal(t, testData, receivedData.Bytes()) + }) + } +} diff --git a/pkg/nixpath/chunker/fastcdc.go b/pkg/nixpath/chunker/fastcdc.go new file mode 100644 index 00000000..b85ede6c --- /dev/null +++ b/pkg/nixpath/chunker/fastcdc.go @@ -0,0 +1,48 @@ +package chunker + +import ( + "errors" + "fmt" + "io" + + fastcdc "github.com/poolpOrg/go-fastcdc" +) + +func NewFastCDCChunker(r io.Reader) (Chunker, error) { // nolint:ireturn + fastcdc.NewChunkerOptions() + chunkerOpts := fastcdc.NewChunkerOptions() + + // FUTUREWORK: Test with different chunk sizes + chunkerOpts.NormalSize = 64 * 2024 + chunkerOpts.MinSize = chunkerOpts.NormalSize / 4 + chunkerOpts.MaxSize = chunkerOpts.NormalSize * 4 + + c, err := fastcdc.NewChunker(r, chunkerOpts) + if err != nil { + return nil, fmt.Errorf("unable to initialize fastcdc: %w", err) + } + + return &FastCDCChunker{ + c: c, + }, nil +} + +type FastCDCChunker struct { + c *fastcdc.Chunker +} + +func (f *FastCDCChunker) Next() (*Chunk, error) { + chunk, err := f.c.Next() + if err != nil { + if errors.Is(err, io.EOF) { + return nil, err + } + + return nil, fmt.Errorf("error getting next chunk: %w", err) + } + + return &Chunk{ + Offset: chunk.Offset, + Data: chunk.Data, + }, nil +} diff --git a/pkg/nixpath/chunker/interface.go b/pkg/nixpath/chunker/interface.go new file mode 100644 index 00000000..b6abc9c7 --- /dev/null +++ b/pkg/nixpath/chunker/interface.go @@ -0,0 +1,14 @@ +package chunker + +type Chunk struct { + Offset uint64 + Data []byte +} + +// Chunker describes the interface that a given chunker needs to implement. +// Next() is periodically called until io.EOF is encountered. +// In case of no error, Next() returns a new chunk. + +type Chunker interface { + Next() (*Chunk, error) +} diff --git a/pkg/nixpath/chunker/simple.go b/pkg/nixpath/chunker/simple.go new file mode 100644 index 00000000..77aaa20d --- /dev/null +++ b/pkg/nixpath/chunker/simple.go @@ -0,0 +1,39 @@ +package chunker + +import ( + "bytes" + "fmt" + "io" +) + +func NewSimpleChunker(r io.Reader) Chunker { // nolint:ireturn + return &SimpleChunker{ + r: r, + } +} + +// SimpleChunker simply returns one chunk for all of the contents. +type SimpleChunker struct { + r io.Reader + done bool +} + +func (s *SimpleChunker) Next() (*Chunk, error) { + // if we already read everything, return io.EOF + if s.done { + return nil, io.EOF + } + + var buf bytes.Buffer + + if _, err := io.Copy(&buf, s.r); err != nil { + return nil, fmt.Errorf("error returning from reader: %w", err) + } + + s.done = true + + return &Chunk{ + Offset: 0, + Data: buf.Bytes(), + }, nil +}