diff --git a/go.mod b/go.mod index 046bcc65..db6bf909 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/alecthomas/kong v0.5.0 github.com/dgraph-io/badger/v3 v3.2103.2 github.com/google/go-cmp v0.5.5 + github.com/poolpOrg/go-fastcdc v0.0.0-20211130135149-aa8a1e8a10db github.com/stretchr/testify v1.7.0 ) @@ -22,10 +23,18 @@ require ( github.com/golang/snappy v0.0.3 // indirect github.com/google/flatbuffers v1.12.1 // indirect github.com/klauspost/compress v1.12.3 // indirect + github.com/klauspost/cpuid/v2 v2.0.9 // indirect + github.com/minio/sha256-simd v1.0.0 // indirect + github.com/mr-tron/base58 v1.2.0 // indirect + github.com/multiformats/go-multihash v0.2.0 // indirect + github.com/multiformats/go-varint v0.0.6 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/spaolacci/murmur3 v1.1.0 // indirect go.opencensus.io v0.22.5 // indirect - golang.org/x/net v0.0.0-20201021035429-f5854403a974 // indirect - golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c // indirect + golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e // indirect + golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2 // indirect + golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1 // indirect gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect + lukechampine.com/blake3 v1.1.6 // indirect ) diff --git a/go.sum b/go.sum index 766d067d..12177edc 100644 --- a/go.sum +++ b/go.sum @@ -52,14 +52,25 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.12.3 h1:G5AfA94pHPysR56qqrkO2pxEexdDzrpFJ6yt/VqWxVU= github.com/klauspost/compress v1.12.3/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= +github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g= +github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= +github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= +github.com/multiformats/go-multihash v0.2.0 h1:oytJb9ZA1OUW0r0f9ea18GiaPOo4SXyc7p2movyUuo4= +github.com/multiformats/go-multihash v0.2.0/go.mod h1:WxoMcYG85AZVQUyRyo9s4wULvW5qrI9vb2Lt6evduFc= +github.com/multiformats/go-varint v0.0.6 h1:gk85QWKxh3TazbLxED/NlDVv8+q+ReFJk7Y2W/KhfNY= +github.com/multiformats/go-varint v0.0.6/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -67,6 +78,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/poolpOrg/go-fastcdc v0.0.0-20211130135149-aa8a1e8a10db h1:PfgdUkbymefXsGoYrYjCpZh1PcmQ3tDHKGt1fSDm1+o= +github.com/poolpOrg/go-fastcdc v0.0.0-20211130135149-aa8a1e8a10db/go.mod h1:9KpZcfiu0ZuQsRGTJ8ggDEjKUndlV6TFf8IMKNhW1qA= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= @@ -93,6 +106,8 @@ golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnf golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e h1:T8NU3HyQ8ClP4SEE+KbFlg6n0NhuTsN4MyznaarGsZM= +golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= @@ -108,6 +123,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974 h1:IX6qOQeG5uLjB/hjjwjedwfjND0hgjPMMyO1RoIXQNI= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2 h1:CIJ76btIcR3eFI5EgSo6k1qKw9KJexJuRLI9G7Hp5wE= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -123,6 +140,8 @@ golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c h1:VwygUrnw9jn88c4u8GD3rZQbqrP/tgas88tPUbBxQrk= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1 h1:SrN+KX8Art/Sf4HNj6Zcz06G7VEz+7w9tdXTPOZ7+l4= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -152,3 +171,5 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +lukechampine.com/blake3 v1.1.6 h1:H3cROdztr7RCfoaTpGZFQsrqvweFLrqS73j7L7cmR5c= +lukechampine.com/blake3 v1.1.6/go.mod h1:tkKEOtDkNtklkXtLNEOGNq5tcV90tJiA1vAA12R78LA= diff --git a/pkg/hash/writer.go b/pkg/hash/writer.go new file mode 100644 index 00000000..3a4d1344 --- /dev/null +++ b/pkg/hash/writer.go @@ -0,0 +1,57 @@ +package hash + +import ( + "fmt" + "hash" + "io" +) + +var _ io.Writer = &Writer{} + +// Writer is a io.Writer. Bytes can be written to it. +// After each write, it'll allow querying for the total number of bytes written, +// as well as the digest of the bytes written. +// TODO: tests! +type Writer struct { + h hash.Hash + bytesWritten uint64 +} + +// Write writes the given bytes to the internal hasher +// and increments the number of bytes written. +func (hw *Writer) Write(p []byte) (int, error) { + n, err := hw.h.Write(p) + if err != nil { + return 0, fmt.Errorf("unable to write to hash function: %w", err) + } + + hw.bytesWritten += uint64(n) + + return n, nil +} + +// Digest returns the digest of the internal hash function. +func (hw *Writer) Digest() []byte { + return hw.h.Sum(nil) +} + +// BytesWritten returns the number of bytes written. +func (hw *Writer) BytesWritten() uint64 { + return hw.bytesWritten +} + +// Reset wipes all internal state. +func (hw *Writer) Reset() { + hw.h.Reset() + hw.bytesWritten = 0 +} + +// NewWriter returns a new hash.Writer for a given HashType. +func NewWriter(hashType HashType) (*Writer, error) { + hashFunc := hashFunc(hashType) + + return &Writer{ + h: hashFunc.New(), + bytesWritten: 0, + }, nil +} diff --git a/pkg/nixpath/chunker/chunker_test.go b/pkg/nixpath/chunker/chunker_test.go new file mode 100644 index 00000000..6114ff98 --- /dev/null +++ b/pkg/nixpath/chunker/chunker_test.go @@ -0,0 +1,83 @@ +package chunker_test + +import ( + "bytes" + _ "embed" + "errors" + "io" + "testing" + + "github.com/nix-community/go-nix/pkg/nixpath/chunker" + "github.com/stretchr/testify/assert" +) + +//go:embed simple.go +var testData []byte + +// nolint:gochecknoglobals +var chunkers = []struct { + Name string + New func([]byte) chunker.Chunker +}{ + { + "Simple", + func(data []byte) chunker.Chunker { + return chunker.NewSimpleChunker(bytes.NewReader(data)) + }, + }, + { + "FastCDC", + func(data []byte) chunker.Chunker { + c, err := chunker.NewFastCDCChunker(bytes.NewReader(data)) + if err != nil { + panic(err) + } + + return c + }, + }, +} + +func TestEmptySlice(t *testing.T) { + for _, chunker := range chunkers { + t.Run(chunker.Name, func(t *testing.T) { + // create a new chunker with the testData + c := chunker.New([]byte{}) + + _, err := c.Next() + if assert.Error(t, err, "c.Next should return an error") { + assert.ErrorIs(t, err, io.EOF, "it should be EOF") + } + }) + } +} + +func TestSimple(t *testing.T) { + for _, chunker := range chunkers { + // grab data out of the chunker. + // Ensure it matches testData. + t.Run(chunker.Name, func(t *testing.T) { + // create a new chunker with the testData + c := chunker.New(testData) + + var receivedData bytes.Buffer + + for { + chunk, err := c.Next() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + assert.NoError(t, err, "no other error other than EOF is accepted") + } + // write the data into the receivedData buffer + if _, err := receivedData.Write(chunk); err != nil { + panic(err) + } + } + + // compare received chunk contents with what was passed into the chunker + assert.Equal(t, testData, receivedData.Bytes()) + }) + } +} diff --git a/pkg/nixpath/chunker/fastcdc.go b/pkg/nixpath/chunker/fastcdc.go new file mode 100644 index 00000000..efed162f --- /dev/null +++ b/pkg/nixpath/chunker/fastcdc.go @@ -0,0 +1,45 @@ +package chunker + +import ( + "errors" + "fmt" + "io" + + fastcdc "github.com/poolpOrg/go-fastcdc" +) + +func NewFastCDCChunker(r io.Reader) (Chunker, error) { // nolint:ireturn + fastcdc.NewChunkerOptions() + chunkerOpts := fastcdc.NewChunkerOptions() + + // FUTUREWORK: Test with different chunk sizes + chunkerOpts.NormalSize = 64 * 2024 + chunkerOpts.MinSize = chunkerOpts.NormalSize / 4 + chunkerOpts.MaxSize = chunkerOpts.NormalSize * 4 + + c, err := fastcdc.NewChunker(r, chunkerOpts) + if err != nil { + return nil, fmt.Errorf("unable to initialize fastcdc: %w", err) + } + + return &FastCDCChunker{ + c: c, + }, nil +} + +type FastCDCChunker struct { + c *fastcdc.Chunker +} + +func (f *FastCDCChunker) Next() (Chunk, error) { + chunk, err := f.c.Next() + if err != nil { + if errors.Is(err, io.EOF) { + return nil, err + } + + return nil, fmt.Errorf("error getting next chunk: %w", err) + } + + return (Chunk)(chunk.Data), nil +} diff --git a/pkg/nixpath/chunker/interface.go b/pkg/nixpath/chunker/interface.go new file mode 100644 index 00000000..2aae6be1 --- /dev/null +++ b/pkg/nixpath/chunker/interface.go @@ -0,0 +1,14 @@ +package chunker + +type Chunk []byte + +// Chunker describes the interface that a given chunker needs to implement. +// Next() is periodically called until io.EOF is encountered. +// In case of no error, Next() returns a new chunk. + +// TODO: is this interface the right one, or should we add initialization +// to the interface? Look at how it's used in pkg/store/import.go + +type Chunker interface { + Next() (Chunk, error) +} diff --git a/pkg/nixpath/chunker/simple.go b/pkg/nixpath/chunker/simple.go new file mode 100644 index 00000000..3346422a --- /dev/null +++ b/pkg/nixpath/chunker/simple.go @@ -0,0 +1,42 @@ +package chunker + +import ( + "bytes" + "fmt" + "io" +) + +func NewSimpleChunker(r io.Reader) Chunker { // nolint:ireturn + return &SimpleChunker{ + r: r, + } +} + +// SimpleChunker simply returns one chunk for all of the contents. +type SimpleChunker struct { + r io.Reader + done bool +} + +func (s *SimpleChunker) Next() (Chunk, error) { + // if we already read everything, return io.EOF + if s.done { + return nil, io.EOF + } + + var buf bytes.Buffer + + w, err := io.Copy(&buf, s.r) + if err != nil { + return nil, fmt.Errorf("error returning from reader: %w", err) + } + + s.done = true + + // if we got passed an empty slice, return io.EOF + if w == 0 { + return nil, io.EOF + } + + return buf.Bytes(), nil +} diff --git a/pkg/store/chunks_reader.go b/pkg/store/chunks_reader.go new file mode 100644 index 00000000..1686c560 --- /dev/null +++ b/pkg/store/chunks_reader.go @@ -0,0 +1,58 @@ +package store + +import ( + "bytes" + "context" + "fmt" + "io" +) + +// ChunksReader allows reading over a list of multiple chunks. +// It retrieves these from a chunk store. +type ChunksReader struct { + ctx context.Context + chunkStore ChunkStore + chunks []*ChunkMeta + chunkIdx int + buf bytes.Buffer + err error +} + +func NewChunksReader(ctx context.Context, chunks []*ChunkMeta, chunkStore ChunkStore) *ChunksReader { + return &ChunksReader{ + chunkStore: chunkStore, + chunks: chunks, + chunkIdx: 0, + } +} + +// Read will return more data. As the chunk sizes usually differ from the size of p this is called with, +// we buffer the currently requested chunk in a buffer and drain it, requesting a new one when it's empty. +func (cr *ChunksReader) Read(p []byte) (n int, err error) { + if cr.err != nil { + return 0, cr.err + } + // if the buffer is empty, we need to request a new chunk. + if cr.buf.Len() == 0 { + // check if chunkIdx would point outside the list of chunks + if cr.chunkIdx >= len(cr.chunks) { + cr.err = io.EOF + + return 0, cr.err + } + + b, err := cr.chunkStore.Get(cr.ctx, cr.chunks[cr.chunkIdx].Identifier) + if err != nil { + cr.err = err + + return 0, fmt.Errorf("unable to retrieve chunk from the chunk store: %w", err) + } + + _, _ = cr.buf.Write(b) + + // Increment chunkIdx, which might overshoot. It's fine, as we check before fetching a new chunk + cr.chunkIdx++ + } + + return cr.buf.Read(p) +} diff --git a/pkg/store/chunkstore/badger.go b/pkg/store/chunkstore/badger.go new file mode 100644 index 00000000..48ddd454 --- /dev/null +++ b/pkg/store/chunkstore/badger.go @@ -0,0 +1,157 @@ +package chunkstore + +import ( + "bytes" + "context" + "fmt" + "sync" + + "github.com/dgraph-io/badger/v3" + "github.com/nix-community/go-nix/pkg/store" +) + +var _ store.ChunkStore = &BadgerStore{} + +func buildDefaultBadgerOptions(path string) badger.Options { + // set log level for badger to WARN, as it spams with INFO: + // https://github.com/dgraph-io/badger/issues/556#issuecomment-536145162 + return badger.DefaultOptions(path).WithLoggingLevel(badger.WARNING) +} + +// NewBadgerStore opens a store that stores its data +// in the path specified by path (or in memory, if inMemory is set to true) +// hashName needs to be one of the hash algorithms supported by go-multihash, +// and will be used to identify new hashes being uploaded. +func NewBadgerStore(hashName string, path string, inMemory bool) (*BadgerStore, error) { + badgerOpts := buildDefaultBadgerOptions(path) + if inMemory { + badgerOpts = badgerOpts.WithInMemory(true) + } + + db, err := badger.Open(badgerOpts) + if err != nil { + return nil, fmt.Errorf("error opening badger store: %w", err) + } + + hasherPool, err := store.NewHasherPool(hashName) + if err != nil { + return nil, fmt.Errorf("unable to create new hasher pool for %v: %w", hashName, err) + } + + return &BadgerStore{ + db: db, + hasherPool: hasherPool, + }, nil +} + +// NewBadgerMemoryStore opens a store that entirely resides in memory. +func NewBadgerMemoryStore(hashName string) (*BadgerStore, error) { + return NewBadgerStore(hashName, "", true) +} + +// BadgerStore stores chunks using badger. +type BadgerStore struct { + db *badger.DB + hasherPool *sync.Pool +} + +// Get retrieves a chunk by its identifier. +// The chunks are not checked to match the checksum, +// as the local badger store is considered trusted. +// FUTUREWORK: make configurable? +func (bs *BadgerStore) Get( + ctx context.Context, + id store.ChunkIdentifier, +) ([]byte, error) { + var data []byte + + err := bs.db.View(func(txn *badger.Txn) error { + item, err := txn.Get(id) + if err != nil { + return err + } + + return item.Value(func(val []byte) error { + data = append([]byte{}, val...) + + return nil + }) + }) + if err != nil { + if err == badger.ErrKeyNotFound { + return nil, fmt.Errorf("chunk not found") + } + + return nil, fmt.Errorf("error reading from badger: %w", err) + } + + return data, nil +} + +// Has checks if a certain chunk exists in a local chunk store. +func (bs *BadgerStore) Has( + ctx context.Context, + id store.ChunkIdentifier, +) (bool, error) { + found := false + + err := bs.db.View(func(txn *badger.Txn) error { + opts := badger.DefaultIteratorOptions + opts.PrefetchValues = false + it := txn.NewIterator(opts) + defer it.Close() + + for it.Seek(id); it.Valid(); it.Next() { + item := it.Item() + k := item.Key() + if bytes.Equal(k, id) { + found = true + + break + } + } + + return nil + }) + if err != nil { + return false, fmt.Errorf("unable to check for existence in badger: %w", err) + } + + return found, nil +} + +// Put inserts a chunk into the store. +// The identifier/hash is returned. +func (bs *BadgerStore) Put( + ctx context.Context, + data []byte, +) (store.ChunkIdentifier, error) { + hasher := bs.hasherPool.Get().(*store.Hasher) + + _, err := hasher.Write(data) + if err != nil { + return nil, fmt.Errorf("error hashing data: %w", err) + } + + id, err := hasher.Sum() + if err != nil { + return nil, fmt.Errorf("error calculating multihash: %w", err) + } + + err = bs.db.Update(func(txn *badger.Txn) error { + err := txn.Set(id, data) + + return err + }) + + if err != nil { + return nil, fmt.Errorf("error writing to badger: %w", err) + } + + return id, nil +} + +// Close closes the store. +func (bs *BadgerStore) Close() error { + return bs.db.Close() +} diff --git a/pkg/store/chunkstore/filesystem.go b/pkg/store/chunkstore/filesystem.go new file mode 100644 index 00000000..d7e9a232 --- /dev/null +++ b/pkg/store/chunkstore/filesystem.go @@ -0,0 +1,149 @@ +package chunkstore + +import ( + "context" + "encoding/hex" + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "sync" + + "github.com/nix-community/go-nix/pkg/store" +) + +var _ store.ChunkStore = &FilesystemStore{} + +func NewFilesystemStore(hashName string, baseDirectory string) (*FilesystemStore, error) { + err := os.MkdirAll(baseDirectory, os.ModePerm) + if err != nil { + return nil, fmt.Errorf("error mkdir'ing base directory: %w", err) + } + + hasherPool, err := store.NewHasherPool(hashName) + if err != nil { + return nil, fmt.Errorf("unable to create new hasher pool for %v: %w", hashName, err) + } + + return &FilesystemStore{ + baseDirectory: baseDirectory, + hasherPool: hasherPool, + }, nil +} + +// TODO: generalize on io/fs? or rclone? + +type FilesystemStore struct { + baseDirectory string + hasherPool *sync.Pool + // TODO: allow compression. Probably default to zstd. +} + +// chunkPath calculates the path on the filesystem to the chunk +// identified by id. +func (fs *FilesystemStore) chunkPath(id store.ChunkIdentifier) string { + encodedID := hex.EncodeToString(id) + + return filepath.Join(fs.baseDirectory, encodedID[:4], encodedID+".chunk") +} + +func (fs *FilesystemStore) Get( + ctx context.Context, + id store.ChunkIdentifier, +) ([]byte, error) { + p := fs.chunkPath(id) + + f, err := os.Open(p) + if err != nil { + return nil, err + } + + defer f.Close() + + contents, err := io.ReadAll(f) + if err != nil { + return nil, fmt.Errorf("error reading file contents from %v: %w", p, err) + } + // TODO: configurable content validation? + + return contents, nil +} + +func (fs *FilesystemStore) Has( + ctx context.Context, + id store.ChunkIdentifier, +) (bool, error) { + p := fs.chunkPath(id) + + _, err := os.Stat(p) + if err != nil { + if errors.Is(err, io.EOF) { + return false, nil + } + + return false, fmt.Errorf("error stat()ing %v: %w", p, err) + } + + return true, nil +} + +func (fs *FilesystemStore) Put( + ctx context.Context, + data []byte, +) (store.ChunkIdentifier, error) { + // get a hasher + hasher := fs.hasherPool.Get().(*store.Hasher) + + // create a tempfile (in the same directory). + // We write to it, then move it to where we want it to be + // this is to ensure an atomic write/replacement. + tmpFile, err := ioutil.TempFile(fs.baseDirectory, "") + if err != nil { + return nil, fmt.Errorf("error creating temporary file: %w", err) + } + + defer tmpFile.Close() + defer os.Remove(tmpFile.Name()) + + w := io.MultiWriter(hasher, tmpFile) + + _, err = w.Write(data) + if err != nil { + return nil, fmt.Errorf("error writing data: %w", err) + } + + id, err := hasher.Sum() + if err != nil { + return nil, fmt.Errorf("error calculating multihash: %w", err) + } + + // close tmpFile for writing, everything written + err = tmpFile.Close() + if err != nil { + return nil, fmt.Errorf("error closing temporary file: %w", err) + } + + // calculate the final path to store the chunk at + p := fs.chunkPath(id) + + // create parent directories if needed + err = os.MkdirAll(filepath.Dir(p), os.ModePerm) + if err != nil { + return nil, fmt.Errorf("unable to mkdir'ig parent directory for %v: %w", p, err) + } + + // move chunk at the location + err = os.Rename(tmpFile.Name(), p) + if err != nil { + return nil, fmt.Errorf("error moving temporary file to it's final location (%v): %w", p, err) + } + + return id, nil +} + +// Close closes the store. +func (fs *FilesystemStore) Close() error { + return nil +} diff --git a/pkg/store/export.go b/pkg/store/export.go new file mode 100644 index 00000000..e3ba84d7 --- /dev/null +++ b/pkg/store/export.go @@ -0,0 +1,89 @@ +package store + +import ( + "context" + "fmt" + "io" + "sort" + + "github.com/nix-community/go-nix/pkg/nar" +) + +// Export consumes: +// - context +// - a PathInfo struct +// - a writer to write the NAR file contents to +// - a pointer to a chunk store +// It will write the synthesized NAR file contents to the +// passed writer, then return the storePath and references, or an error. +func Export( + ctx context.Context, + pathInfo *PathInfo, + w io.Writer, + chunkStore ChunkStore, +) (string, []string, error) { + // set up the NAR writer + nw, err := nar.NewWriter(w) + if err != nil { + return "", nil, fmt.Errorf("error setting up nar writer: %w", err) + } + + // assemble a list of Entries + entries := make([]entryWithPath, 0, len(pathInfo.Directories)+len(pathInfo.Regulars)+len(pathInfo.Symlinks)) + for _, directoryEntry := range pathInfo.Directories { + entries = append(entries, directoryEntry) + } + + for _, regularEntry := range pathInfo.Regulars { + entries = append(entries, regularEntry) + } + + for _, symlinkEntry := range pathInfo.Symlinks { + entries = append(entries, symlinkEntry) + } + + // sort the slice based on their Path. + sort.Slice(entries, func(i, j int) bool { + return nar.PathIsLexicographicallyOrdered(entries[i].GetPath(), entries[j].GetPath()) + }) + + // loop over the elements, use reflection to figure out the type and feed the nar writer. + for _, entry := range entries { + switch v := entry.(type) { + case *DirectoryEntry: + if err := nw.WriteHeader(&nar.Header{ + Path: v.GetPath(), + Type: nar.TypeDirectory, + }); err != nil { + return "", nil, fmt.Errorf("error writing directory header: %w", err) + } + case *RegularEntry: + if err := nw.WriteHeader(&nar.Header{ + Path: v.GetPath(), + Type: nar.TypeRegular, + Executable: v.Executable, + }); err != nil { + return "", nil, fmt.Errorf("error writing regular header: %w", err) + } + // use a ChunksReader to read through all the chunks and write them to the nar writer + r := NewChunksReader(ctx, v.Chunks, chunkStore) + if _, err := io.Copy(nw, r); err != nil { + return "", nil, fmt.Errorf("unable to write file content to nar writer: %w", err) + } + case *SymlinkEntry: + if err := nw.WriteHeader(&nar.Header{ + Path: v.GetPath(), + Type: nar.TypeSymlink, + LinkTarget: v.Target, + }); err != nil { + return "", nil, fmt.Errorf("error writing symlink header: %w", err) + } + } + } + + if err := nw.Close(); err != nil { + return "", nil, fmt.Errorf("error closing nar writer: %w", err) + } + + return pathInfo.OutputName, pathInfo.References, nil +} diff --git a/pkg/store/hasher.go b/pkg/store/hasher.go new file mode 100644 index 00000000..768c4fb7 --- /dev/null +++ b/pkg/store/hasher.go @@ -0,0 +1,74 @@ +package store + +import ( + "crypto/sha256" + "fmt" + "hash" + "io" + "sync" + + "github.com/multiformats/go-multihash" +) + +// Hasher implements io.Writer. +var _ io.Writer = &Hasher{} + +type Hasher struct { + hash hash.Hash + hashName string +} + +func NewHasher(hashName string) (*Hasher, error) { + var hash hash.Hash + + switch hashName { + case "sha2-256": + hash = sha256.New() + default: + return nil, fmt.Errorf("unknown hash: %v", hashName) + } + + return &Hasher{ + hashName: hashName, + hash: hash, + }, nil +} + +func (h *Hasher) Write(p []byte) (n int, err error) { + return h.hash.Write(p) +} + +func (h *Hasher) Reset() { + h.hash.Reset() +} + +// Sum returns the digest, in multihash format. +func (h *Hasher) Sum() ([]byte, error) { + return multihash.EncodeName(h.hash.Sum(nil), h.hashName) +} + +// NewHasherPool returns a sync.Pool of a Hasher with the given hashName +// It creates one hasher to check the hash name is supported +// (which is then put in the pool), to avoid panic()'ing later. +func NewHasherPool(hashName string) (*sync.Pool, error) { + // try to set up a hasher once, to avoid panic'ing later. + firstHasher, err := NewHasher(hashName) + if err != nil { + return nil, fmt.Errorf("error setting up hasher: %w", err) + } + + syncPool := &sync.Pool{ + New: func() interface{} { + hasher, err := NewHasher(hashName) + if err != nil { + panic(fmt.Errorf("error setting up hasher: %w", err)) + } + + return hasher + }, + } + + syncPool.Put(firstHasher) + + return syncPool, nil +} diff --git a/pkg/store/http.go b/pkg/store/http.go new file mode 100644 index 00000000..373c81f7 --- /dev/null +++ b/pkg/store/http.go @@ -0,0 +1,355 @@ +package store + +import ( + "bytes" + "context" + "fmt" + "io" + "net/http" + "net/url" + "path" + "sync" + + "github.com/nix-community/go-nix/pkg/hash" + "github.com/nix-community/go-nix/pkg/nar/narinfo" + "github.com/nix-community/go-nix/pkg/nixbase32" + "github.com/nix-community/go-nix/pkg/nixpath" +) + +var _ Store = &HTTPStore{} + +// HTTPStore exposes a HTTP binary cache using .narinfo and .nar files +// It uses/populates another Store and ChunkStore as a cache, +// The ChunkStore also needs to be populated with the chunks +// referred in the PathInfo of a Put() call. +// TODO: Allow configuring to validate signatures. +type HTTPStore struct { + cacheStore Store + cacheChunkStore ChunkStore + Client *http.Client + BaseURL *url.URL +} + +// getNarinfoURL returns the full URL to a .narinfo, +// with respect to the configured baseURL. +// It constructs the URL by extracting the hash from the outputPath URL. +func (hs *HTTPStore) getNarinfoURL(outputPath *nixpath.NixPath) url.URL { + // copy the base URL + url := *hs.BaseURL + + url.Path = path.Join(url.Path, nixbase32.EncodeToString(outputPath.Digest)+".narinfo") + + return url +} + +// getNarURL returns the full URL to a .nar file, +// with respect to the configured baseURL +// It constructs the full URL by appending the passed URL to baseURL +// The passed URL usually comes from the `URL` field in the .narinfo file. +func (hs *HTTPStore) getNarURL(narPath string) url.URL { + // copy the base URL + url := *hs.BaseURL + + url.Path = path.Join(url.Path, narPath) + + return url +} + +func (hs *HTTPStore) Get(ctx context.Context, outputPath string) (*PathInfo, error) { + np, err := nixpath.FromString(outputPath) + if err != nil { + return nil, fmt.Errorf("error parsing output path: %w", err) + } + + // check the underlying cache store first + has, err := hs.cacheStore.Has(ctx, outputPath) + if err != nil { + return nil, fmt.Errorf("error asking underlying cache store: %w", err) + } + // if it's in there, we can just pass it along + if has { + return hs.cacheStore.Get(ctx, outputPath) + } + + // else, cause substitution. + niURL := hs.getNarinfoURL(np) + + // construct the request + niReq, err := http.NewRequestWithContext(ctx, "GET", niURL.String(), nil) + if err != nil { + return nil, fmt.Errorf("error constructing request: %w", err) + } + + // do the request for the .narinfo file + niResp, err := hs.Client.Do(niReq) + if err != nil { + return nil, fmt.Errorf("error doing narinfo request: %w", err) + } + defer niResp.Body.Close() + + if niResp.StatusCode < 200 || niResp.StatusCode >= 300 { + return nil, fmt.Errorf("bad status code retrieving narinfo: %v", niResp.StatusCode) + } + + ni, err := narinfo.Parse(niResp.Body) + if err != nil { + return nil, fmt.Errorf("error parsing narinfo: %w", err) + } + + // ensure the .narinfo mentions the same store path that we expected + if ni.StorePath != outputPath { + return nil, fmt.Errorf("narinfo shows wrong storepath, got %v, expected %v", ni.StorePath, outputPath) + } + + // some more basic consistency checks of the .narinfo + if err := ni.Check(); err != nil { + return nil, fmt.Errorf(".narinfo fails consistency check: %w", err) + } + + // TODO: signature checks + + // construct the URL for the .nar file + narURL := hs.getNarURL(ni.URL) + + // construct the request for the .nar file + narReq, err := http.NewRequestWithContext(ctx, "GET", narURL.String(), nil) + if err != nil { + return nil, fmt.Errorf("error constructing nar request: %w", err) + } + + // do the request for the .nar file + narResp, err := hs.Client.Do(narReq) + if err != nil { + return nil, fmt.Errorf("error doing nar request: %w", err) + } + defer narResp.Body.Close() + + if niResp.StatusCode < 200 || niResp.StatusCode >= 300 { + return nil, fmt.Errorf("bad status code retrieving nar: %v", niResp.StatusCode) + } + + // TODO: handle compression. + // In case of compression AND provided FilePath/FileSize, also check these? + + hashWriter, err := hash.NewWriter(ni.NarHash.HashType) + if err != nil { + return nil, fmt.Errorf("error constructing hash.Writer: %w", err) + } + + // setup a io.TeeReader to ensure the nar file contents get written to hashWriter + // while the NarReader reads through the body. + tr := io.TeeReader(narReq.Body, hashWriter) + + // receive the .nar file + pathInfo, err := Import(ctx, ni.StorePath, ni.References, tr, hs.cacheChunkStore) + if err != nil { + return nil, fmt.Errorf("error converting narinfo and nar to pathInfo: %w", err) + } + + // query the hashWriter if size matches. + if ni.NarSize != hashWriter.BytesWritten() { + return nil, fmt.Errorf("read %v bytes of nar, expected %v", ni.NarSize, hashWriter.BytesWritten()) + } + + // query the hashWriter if hash matches + if !bytes.Equal(ni.NarHash.Digest, hashWriter.Digest()) { + return nil, fmt.Errorf("got %s:%s as NarHash, expected %s", + ni.NarHash.HashType, + nixbase32.EncodeToString(hashWriter.Digest()), + ni.NarHash, + ) + } + + return pathInfo, nil +} + +func (hs *HTTPStore) Has(ctx context.Context, outputPath string) (bool, error) { + np, err := nixpath.FromString(outputPath) + if err != nil { + return false, fmt.Errorf("error parsing output path: %w", err) + } + + // check the underlying cache store first + has, err := hs.cacheStore.Has(ctx, outputPath) + if err != nil { + return false, fmt.Errorf("error asking underlying cache store: %w", err) + } + // if it's in there, we can return true. + if has { + return true, nil + } + + // else, we peek at the .narinfo file with a HEAD request. + niURL := hs.getNarinfoURL(np) + + // construct the request + niReq, err := http.NewRequestWithContext(ctx, "HEAD", niURL.String(), nil) + if err != nil { + return false, fmt.Errorf("error constructing request: %w", err) + } + + // do the request for the .narinfo file + niResp, err := hs.Client.Do(niReq) + if err != nil { + return false, fmt.Errorf("error doing narinfo request: %w", err) + } + defer niResp.Body.Close() + + // if we get a 404, we assume it doesn't exist. + if niResp.StatusCode == http.StatusNotFound { + return false, nil + } + + // return an error in case other errors occur + // TODO: check if there's other status codes that should return false, nil + if niResp.StatusCode < 200 || niResp.StatusCode >= 300 { + return false, fmt.Errorf("bad status code retrieving narinfo: %v", niResp.StatusCode) + } + + // else, return true. + return true, nil +} + +func (hs *HTTPStore) Put(ctx context.Context, pathInfo *PathInfo) error { + np, err := nixpath.FromString(pathInfo.OutputName) + if err != nil { + return fmt.Errorf("error parsing output path: %w", err) + } + + // Usually NAR files are uploaded to /nar/$narhash.nar[.$compressionSuffix]. + // However, this means we need to know the NARHash before uploading the files, as + // a plain HTTP PUT call contains the destination path, and we can't move files after upload. + + // This means, we render the NAR file twice - once to calculate NarHash, NarSize, + // a second time to do the actual upload. + + // create a hashWriter to calculate NARHash and NARSize. + // TODO: make hash function configurable? + hashWriter, err := hash.NewWriter(hash.HashTypeSha512) + if err != nil { + return fmt.Errorf("error constructing hash.Writer: %w", err) + } + + // This costs a bit more CPU, but is better than keeping the (potentially large) NAR file in memory. + _, _, err = Export(ctx, pathInfo, hashWriter, hs.cacheChunkStore) + if err != nil { + return fmt.Errorf("failed to export NAR to hashwriter: %w", err) + } + + narSize := hashWriter.BytesWritten() + narHash := hash.Hash{ + HashType: hash.HashTypeSha512, + Digest: hashWriter.Digest(), + } + + // determine the nar url. use $narhash.nar. + // TODO: once compression is supported, use compression suffix too + narURLRel := "nar/" + nixbase32.EncodeToString(narHash.Digest) + ".nar" + narURL := hs.getNarURL(narURLRel) + + // set up the io.Pipe, and an upload context. + // Have Export produce a NAR file, and provide the pipe reader side to the http request. + // In case of an error during NAR rendering, cancel the upload. + pipeReader, pipeWriter := io.Pipe() + + narCtx, cancelNar := context.WithCancel(ctx) + + // construct the request to upload the .nar file + narReq, err := http.NewRequestWithContext(narCtx, "PUT", narURL.String(), pipeReader) + + // create a buffered narErrors channel. + narErrors := make(chan error, 2) + + // create a wait group. We use it to ensure both channels exited. + // TODO: can we replace the wait group, and use the context cancellation for synchronization? + var wg sync.WaitGroup + + wg.Add(2) + + // spin up a goroutine for the export to NAR + go func() { + defer wg.Done() + defer pipeWriter.Close() + + // Export the NAR, write to the pipe writer + _, _, err = Export(narCtx, pathInfo, pipeWriter, hs.cacheChunkStore) + if err != nil { + narErrors <- fmt.Errorf("failed to export NAR to hashwriter: %w", err) + + cancelNar() + } + }() + + // spin up a goroutine for the HTTP upload + go func() { + defer wg.Done() + defer pipeReader.Close() + + // upload the NAR file + narResp, err := hs.Client.Do(narReq) + if err != nil { + narErrors <- fmt.Errorf("error doing nar request: %w", err) + + cancelNar() + } + + defer narResp.Body.Close() + + if narResp.StatusCode < 200 || narResp.StatusCode >= 300 { + narErrors <- fmt.Errorf("bad status code retrieving nar: %v", narResp.StatusCode) + + cancelNar() + } + }() + + wg.Wait() + + for err := range narErrors { + return err + } + + ni := narinfo.NarInfo{ + StorePath: pathInfo.OutputName, + URL: narURLRel, + + Compression: "none", // TODO + FileHash: &narHash, + FileSize: narSize, + + NarHash: &narHash, + NarSize: narSize, + + References: pathInfo.References, + // Deriver: "", // TODO + // System: "", // TODO + // Signatures: , // TODO + // CA: , // TODO + } + + niURL := hs.getNarinfoURL(np) + + // construct the request to upload the .narinfo file + niReq, err := http.NewRequestWithContext(ctx, "PUT", niURL.String(), bytes.NewBufferString(ni.String())) + if err != nil { + return fmt.Errorf("error constructing request: %w", err) + } + + // do the request to upload the .narinfo file + niResp, err := hs.Client.Do(niReq) + if err != nil { + return fmt.Errorf("error uploading narinfo: %w", err) + } + defer niResp.Body.Close() + + // if we get a non-200-y status code, expect the upload to have failed. + if niResp.StatusCode < 200 || niResp.StatusCode >= 300 { + return fmt.Errorf("bad status code uploading narinfo: %v", niResp.StatusCode) + } + + // finally, insert it into the cacheStore + if err := hs.cacheStore.Put(ctx, pathInfo); err != nil { + return fmt.Errorf("error putting pathinfo into cache store: %w", err) + } + + return nil +} diff --git a/pkg/store/import.go b/pkg/store/import.go new file mode 100644 index 00000000..8f03643c --- /dev/null +++ b/pkg/store/import.go @@ -0,0 +1,99 @@ +package store + +import ( + "context" + "errors" + "fmt" + "io" + + "github.com/nix-community/go-nix/pkg/nar" + "github.com/nix-community/go-nix/pkg/nixpath/chunker" +) + +// Import consumes: +// - a storePath (string) +// - a list of references ([]string) +// - a io.Reader to a NAR file +// - a pointer to a chunk store +// It will save the chunks it came up with into the passed chunk store +// and return a PathInfo object. +func Import( + ctx context.Context, + storePath string, + references []string, + n io.Reader, + chunkStore ChunkStore, +) (*PathInfo, error) { + // populate the PathInfo with storePath and references + pathInfo := &PathInfo{ + OutputName: storePath, + References: references, + } + + // read through the NAR file. + nr, err := nar.NewReader(n) + if err != nil { + return nil, fmt.Errorf("unable to read nar: %w", err) + } + + for { + hdr, err := nr.Next() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + + return nil, fmt.Errorf("error advancing in nar: %w", err) + } + + switch hdr.Type { + case nar.TypeDirectory: + pathInfo.Directories = append(pathInfo.Directories, &DirectoryEntry{ + Path: hdr.Path, + }) + case nar.TypeRegular: + regularEntry := &RegularEntry{ + Path: hdr.Path, + Executable: hdr.Executable, + } + + // TODO: make chunker used configurable? + // should the chunker interface include a function to send data to it? + chunker, err := chunker.NewFastCDCChunker(nr) + if err != nil { + return nil, fmt.Errorf("unable to init chunker: %w", err) + } + + for { + chunk, err := chunker.Next() + if err != nil { + if errors.Is(err, io.EOF) { + break + } + + return nil, fmt.Errorf("error while chunking %v: %w", hdr.Path, err) + } + + // upload to chunk store. We get the identifier back. + chunkID, err := chunkStore.Put(ctx, chunk) + if err != nil { + return nil, fmt.Errorf("error uploading to chunk store: %w", err) + } + + regularEntry.Chunks = append(regularEntry.Chunks, &ChunkMeta{ + Identifier: chunkID, + Size: uint64(len(chunk)), + }) + } + + pathInfo.Regulars = append(pathInfo.Regulars, regularEntry) + case nar.TypeSymlink: + pathInfo.Symlinks = append(pathInfo.Symlinks, &SymlinkEntry{ + Path: hdr.Path, + Target: hdr.LinkTarget, + }) + } + } + + return pathInfo, nil +} diff --git a/pkg/store/import_test.go b/pkg/store/import_test.go new file mode 100644 index 00000000..2645e341 --- /dev/null +++ b/pkg/store/import_test.go @@ -0,0 +1,162 @@ +package store_test + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "os" + "strings" + "testing" + + "github.com/nix-community/go-nix/pkg/nar" + "github.com/nix-community/go-nix/pkg/nar/narinfo" + "github.com/nix-community/go-nix/pkg/store" + "github.com/nix-community/go-nix/pkg/store/chunkstore" + "github.com/stretchr/testify/assert" +) + +//nolint: gochecknoglobals +var strNarinfoSampleWithoutFileFields = ` +StorePath: /nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432 +URL: nar/1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar.xz +Compression: xz +NarHash: sha256:0lxjvvpr59c2mdram7ympy5ay741f180kv3349hvfc3f8nrmbqf6 +NarSize: 464152 +References: 7gx4kiv5m0i7d7qkixq2cwzbr10lvxwc-glibc-2.27 +Deriver: 10dx1q4ivjb115y3h90mipaaz533nr0d-net-tools-1.60_p20170221182432.drv +Sig: cache.nixos.org-1:sn5s/RrqEI+YG6/PjwdbPjcAC7rcta7sJU4mFOawGvJBLsWkyLtBrT2EuFt/LJjWkTZ+ZWOI9NTtjo/woMdvAg== +Sig: hydra.other.net-1:JXQ3Z/PXf0EZSFkFioa4FbyYpbbTbHlFBtZf4VqU0tuMTWzhMD7p9Q7acJjLn3jofOtilAAwRILKIfVuyrbjAA== +` // TODO: dedup + +func getContentFromInsideNar(narPath string, path string) []byte { + f, err := os.Open(narPath) + if err != nil { + panic(err) + } + defer f.Close() + + nr, err := nar.NewReader(f) + if err != nil { + panic(err) + } + + for { + header, err := nr.Next() + if err != nil { + if errors.Is(err, io.EOF) { + panic(fmt.Sprintf("couldn't find %v in nar", path)) + } + + panic(err) + } + + if header.Path == path { + var buf bytes.Buffer + + _, err := io.Copy(&buf, nr) + if err != nil { + panic(err) + } + + return buf.Bytes() + } + } +} + +func TestFromNarInfo(t *testing.T) { + narPath := "../../test/testdata/nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar" + + f, err := os.Open(narPath) + if err != nil { + panic(err) + } + defer f.Close() + + ni, err := narinfo.Parse(strings.NewReader(strNarinfoSampleWithoutFileFields)) + assert.NoError(t, err) + + cs, err := chunkstore.NewBadgerMemoryStore("sha2-256") + if err != nil { + panic(err) + } + + pi, err := store.Import(context.Background(), ni.StorePath, ni.References, f, cs) + assert.NoError(t, err) + + t.Run("check PathInfo", func(t *testing.T) { + assert.Equal(t, ni.StorePath, pi.OutputName) + assert.Equal(t, ni.References, pi.References) + + assert.Equal(t, []*store.DirectoryEntry{ + {Path: "/"}, + {Path: "/bin"}, + {Path: "/share"}, + {Path: "/share/man"}, + {Path: "/share/man/man1"}, + {Path: "/share/man/man5"}, + {Path: "/share/man/man8"}, + }, pi.Directories) + + assert.Equal(t, []*store.SymlinkEntry{ + {Path: "/bin/dnsdomainname", Target: "hostname"}, + {Path: "/bin/domainname", Target: "hostname"}, + {Path: "/bin/nisdomainname", Target: "hostname"}, + {Path: "/bin/ypdomainname", Target: "hostname"}, + {Path: "/sbin", Target: "bin"}, + }, pi.Symlinks) + + // This is the expected []*store.RegularEntry, omitting the Chunks, + // because it's too much pain to write. + ttRegulars := []*store.RegularEntry{ + {Path: "/bin/arp", Executable: true}, + {Path: "/bin/hostname", Executable: true}, + {Path: "/bin/ifconfig", Executable: true}, + {Path: "/bin/nameif", Executable: true}, + {Path: "/bin/netstat", Executable: true}, + {Path: "/bin/plipconfig", Executable: true}, + {Path: "/bin/rarp", Executable: true}, + {Path: "/bin/route", Executable: true}, + {Path: "/bin/slattach", Executable: true}, + {Path: "/share/man/man1/dnsdomainname.1.gz", Executable: false}, + {Path: "/share/man/man1/domainname.1.gz", Executable: false}, + {Path: "/share/man/man1/hostname.1.gz", Executable: false}, + {Path: "/share/man/man1/nisdomainname.1.gz", Executable: false}, + {Path: "/share/man/man1/ypdomainname.1.gz", Executable: false}, + {Path: "/share/man/man5/ethers.5.gz", Executable: false}, + {Path: "/share/man/man8/arp.8.gz", Executable: false}, + {Path: "/share/man/man8/ifconfig.8.gz", Executable: false}, + {Path: "/share/man/man8/nameif.8.gz", Executable: false}, + {Path: "/share/man/man8/netstat.8.gz", Executable: false}, + {Path: "/share/man/man8/plipconfig.8.gz", Executable: false}, + {Path: "/share/man/man8/rarp.8.gz", Executable: false}, + {Path: "/share/man/man8/route.8.gz", Executable: false}, + {Path: "/share/man/man8/slattach.8.gz", Executable: false}, + } + + // Check Path and Executable fields for equality. + // Assemble the chunk data and check for equality too. + for i, tRegular := range ttRegulars { + assert.Equal(t, tRegular.Path, pi.Regulars[i].Path) + assert.Equal(t, tRegular.Executable, pi.Regulars[i].Executable) + + t.Run("assemble "+tRegular.Path, func(t *testing.T) { + var assembledContents []byte + for _, chunkMeta := range pi.Regulars[i].Chunks { + // query the chunk store for the data + chunkData, err := cs.Get(context.Background(), chunkMeta.Identifier) + if err != nil { + panic(err) + } + // check the size field + assert.Equal(t, chunkMeta.Size, uint64(len(chunkData)), "size field needs to equal actual chunk size") + assembledContents = append(assembledContents, chunkData...) + } + + expectedContents := getContentFromInsideNar(narPath, tRegular.Path) + assert.Equal(t, expectedContents, assembledContents, "chunks assembled together need to be equal to nar content") + }) + } + }) +} diff --git a/pkg/store/interface.go b/pkg/store/interface.go new file mode 100644 index 00000000..39e9329e --- /dev/null +++ b/pkg/store/interface.go @@ -0,0 +1,26 @@ +package store + +import ( + "context" +) + +type Store interface { + Get(ctx context.Context, outputPath string) (*PathInfo, error) + Has(ctx context.Context, outputPath string) (bool, error) + Put(context.Context, *PathInfo) error +} + +type ChunkStore interface { + // Get a chunk by its multihash identifier + Get(ctx context.Context, id ChunkIdentifier) ([]byte, error) + + // Has returns whether a chunk is in the chunk store. + Has(ctx context.Context, id ChunkIdentifier) (bool, error) + + // Put a chunk. Returns its multihash identifier + // Can be a no-op if the chunk already exists + Put(ctx context.Context, data []byte) (ChunkIdentifier, error) + + // Close closes the store. + Close() error +} diff --git a/pkg/store/types.go b/pkg/store/types.go new file mode 100644 index 00000000..a4f79b5d --- /dev/null +++ b/pkg/store/types.go @@ -0,0 +1,55 @@ +package store + +// PathInfo stores information about a specific output path. +type PathInfo struct { + OutputName string + References []string + + Directories []*DirectoryEntry + Regulars []*RegularEntry + Symlinks []*SymlinkEntry + + // TODO: preserve NARHash, NarSize, Nar-sigs for backwards compat? +} + +// entryWithPath requires the struct to provide a GetPath() string method. +type entryWithPath interface { + GetPath() string +} + +type DirectoryEntry struct { + Path string +} + +func (de *DirectoryEntry) GetPath() string { + return de.Path +} + +type RegularEntry struct { + Path string + Executable bool + Chunks []*ChunkMeta +} + +func (re *RegularEntry) GetPath() string { + return re.Path +} + +type SymlinkEntry struct { + Path string + Target string +} + +func (se *SymlinkEntry) GetPath() string { + return se.Path +} + +// TODO: add Validate() function, require Size to be > 0! +type ChunkMeta struct { + Identifier ChunkIdentifier + Size uint64 +} + +// ChunkIdentifier is used to identify chunks. +// We use https://multiformats.io/multihash/ as encoding. +type ChunkIdentifier []byte