From e3107bae9d54f9dee9eed4175ecf02d10ecb55ff Mon Sep 17 00:00:00 2001 From: Felipe Gasper Date: Mon, 11 Aug 2025 17:52:39 -0400 Subject: [PATCH 01/32] add dockey --- dockey/agg.go | 122 ++++++++++++++++++++++++++++++++ dockey/agg_test.go | 113 ++++++++++++++++++++++++++++++ dockey/dockey.go | 132 +++++++++++++++++++++++++++++++++++ dockey/shardkey_pattern.go | 117 +++++++++++++++++++++++++++++++ dockey/unit_test.go | 112 +++++++++++++++++++++++++++++ internal/verifier/compare.go | 7 +- 6 files changed, 598 insertions(+), 5 deletions(-) create mode 100644 dockey/agg.go create mode 100644 dockey/agg_test.go create mode 100644 dockey/dockey.go create mode 100644 dockey/shardkey_pattern.go create mode 100644 dockey/unit_test.go diff --git a/dockey/agg.go b/dockey/agg.go new file mode 100644 index 00000000..793c7f07 --- /dev/null +++ b/dockey/agg.go @@ -0,0 +1,122 @@ +package dockey + +import ( + "strings" + + "go.mongodb.org/mongo-driver/bson" +) + +func ExtractDocKeyAgg(fieldNames []string) bson.D { + return extractDocKeyFromAgg(fieldNames, "$$ROOT") +} + +func extractDocKeyFromAgg(fieldNames []string, rootExpr any) bson.D { + var docKeyPieces [][2]any + + for _, name := range fieldNames { + var valExpr = extractKeyValueAgg(name, rootExpr) + + docKeyPieces = append(docKeyPieces, [...]any{name, valExpr}) + } + + return bson.D{ + {"$arrayToObject", bson.A{docKeyPieces}}, + } +} + +func existsAgg(fieldName string, baseDocExpr any) bson.D { + base, remainder, hasDot := strings.Cut(fieldName, ".") + + fieldExpr := bson.D{ + {"$getField", bson.D{ + {"field", fieldName}, + {"input", baseDocExpr}, + }}, + } + + if !hasDot { + return bson.D{ + {"$ne", bson.A{ + "missing", + bson.D{{"$type", fieldExpr}}, + }}, + } + } + + embDocExpr := bson.D{ + {"$getField", bson.D{ + {"field", base}, + {"input", baseDocExpr}, + }}, + } + + return bson.D{ + {"$cond", bson.D{ + {"if", bson.D{ + {"$ne", bson.A{ + "missing", + bson.D{{"$type", fieldExpr}}, + }}, + }}, + {"then", true}, + {"else", bson.D{ + {"$cond", bson.D{ + {"if", bson.D{ + {"$eq", bson.A{ + "object", + bson.D{{"$type", embDocExpr}}, + }}, + }}, + {"then", extractKeyValueAgg(remainder, embDocExpr)}, + {"else", false}, + }}, + }}, + }}, + } +} + +func extractKeyValueAgg(fieldName string, baseDocExpr any) any { + base, remainder, hasDot := strings.Cut(fieldName, ".") + + fieldExpr := bson.D{ + {"$getField", bson.D{ + {"field", fieldName}, + {"input", baseDocExpr}, + }}, + } + + if !hasDot { + return fieldExpr + } + + embDocExpr := bson.D{ + {"$getField", bson.D{ + {"field", base}, + {"input", baseDocExpr}, + }}, + } + + return bson.D{ + {"$cond", bson.D{ + {"if", bson.D{ + {"$ne", bson.A{ + "missing", + bson.D{{"$type", fieldExpr}}, + }}, + }}, + {"then", fieldExpr}, + {"else", bson.D{ + {"$cond", bson.D{ + {"if", bson.D{ + {"$eq", bson.A{ + "object", + bson.D{{"$type", embDocExpr}}, + }}, + }}, + {"then", extractKeyValueAgg(remainder, embDocExpr)}, + {"else", "$$REMOVE"}, + }}, + }}, + }}, + } +} diff --git a/dockey/agg_test.go b/dockey/agg_test.go new file mode 100644 index 00000000..5b4ef8a3 --- /dev/null +++ b/dockey/agg_test.go @@ -0,0 +1,113 @@ +package dockey + +import ( + "os" + "testing" + + "github.com/10gen/migration-verifier/internal/logger" + "github.com/10gen/migration-verifier/internal/util" + "github.com/10gen/migration-verifier/mslices" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" +) + +func TestExtractDocKeyAgg(t *testing.T) { + ctx := t.Context() + + require := require.New(t) + + cs := os.Getenv("MVTEST_DST") + require.NotEmpty(t, cs, "need MVTEST_DST env") + + client, err := mongo.Connect( + ctx, + options.Client().ApplyURI(cs), + ) + require.NoError(err, "should connect") + + clusterInfo, err := util.GetClusterInfo( + ctx, + logger.NewDefaultLogger(), + client, + ) + require.NoError(err, "should fetch topology") + if clusterInfo.Topology != util.TopologySharded { + t.Skip("Skipping against unsharded cluster.") + } + + db := client.Database(t.Name()) + defer db.Drop(ctx) + + coll := db.Collection("Stuff") + require.NoError(client.Database("admin").RunCommand( + ctx, + bson.D{ + {"shardCollection", db.Name() + "." + coll.Name()}, + {"key", bson.D{ + {"foo.bar.baz", 1}, + }}, + }, + ).Err(), + ) + + computedDocKeyAgg := extractDocKeyFromAgg( + mslices.Of("foo.bar.baz", "_id"), + "$$ROOT.fullDocument", + ) + + ejson, _ := bson.MarshalExtJSON(computedDocKeyAgg, true, false) + t.Logf("computed doc key agg: %s", string(ejson)) + + for _, curCase := range testCases { + changes, err := coll.Watch( + ctx, + mongo.Pipeline{ + { + {"$addFields", bson.D{ + {"computedDocKey", computedDocKeyAgg}, + }}, + }, + }, + ) + require.NoError(err, "should open change stream") + + _, err = coll.InsertOne(ctx, curCase.doc) + require.NoError(err, "should insert doc") + + for changes.Next(ctx) { + var event struct { + OperationType string + FullDocument bson.D + DocumentKey bson.D + ComputedDocKey bson.D + } + + require.NoError(bson.Unmarshal(changes.Current, &event)) + + if event.OperationType != "insert" { + t.Logf("Ignoring irrelevant-seeing event: %v", changes.Current) + continue + } + + ejson, _ := bson.MarshalExtJSON(changes.Current, true, false) + t.Logf("Event: %s", string(ejson)) + + assert.Equal( + t, + event.DocumentKey, + event.ComputedDocKey, + "checking computed doc key for %v against server", + curCase.doc, + ) + + break + } + + require.NoError(changes.Err(), "change stream should not fail") + + changes.Close(ctx) + } +} diff --git a/dockey/dockey.go b/dockey/dockey.go new file mode 100644 index 00000000..2b0d215e --- /dev/null +++ b/dockey/dockey.go @@ -0,0 +1,132 @@ +package dockey + +import ( + "fmt" + "strings" + + "github.com/pkg/errors" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/x/bsonx/bsoncore" +) + +// DocumentKey represents a document key from the change stream. +type DocumentKey bson.Raw + +// NewDocumentKey returns a document key from a bson.Raw, like you might get +// from the change stream. +func NewDocumentKey(raw bson.Raw) DocumentKey { + return DocumentKey(raw) +} + +// ExtractDocumentKey takes a shard key pattern and a document and returns its +// DocumentKey. This is a reimplementation of the same method in the server, +// so that we can reliably generate document keys even without a change +// stream. +func ExtractDocumentKey(pattern Pattern, input bson.Raw) (DocumentKey, error) { + builder, err := extractShardKey(pattern, input) + if err != nil { + return nil, err + } + + if !pattern.ContainsID() { + id, err := input.LookupErr("_id") + if err != nil { + return nil, errors.Wrap(err, "failed to get document _id") + } + + val := bsoncore.Value{Type: id.Type, Data: id.Value} + builder.AppendValue("_id", val) + } + + return DocumentKey(builder.Build()), nil +} + +// extractShardKey does the shard-key part of the document key extraction. +func extractShardKey(pattern Pattern, input bson.Raw) (*bsoncore.DocumentBuilder, error) { + db := bsoncore.NewDocumentBuilder() + + if pattern.IsEmpty() { + return db, nil + } + + // For every element of the shard key pattern: + for _, e := range pattern.Elements() { + key := e.Key() + + rv, err := extractElementAtDottedPath(key, input) + + switch { + case errors.Is(err, bsoncore.ErrElementNotFound), + errors.As(err, &bsoncore.InvalidDepthTraversalError{}): + // This key doesn't exist, do nothing. The server treats these in some + // cases as though an explicit null were present, but when generating + // document keys, it always omits the field entirely. + + case err != nil: + // Something deeply weird went wrong. Try to grab the ID from the + // document so this error is plausibly useful. + id := input.Lookup("_id") + return nil, errors.Wrapf(err, "failed to lookup path for %s (_id: %#q)", key, id) + + default: + // We found the element; add it to the result. + val := bsoncore.Value{Type: rv.Type, Data: rv.Value} + db.AppendValue(key, val) + } + } + + // We're done: return the whole document. + return db, nil +} + +func extractElementAtDottedPath(path string, input bson.Raw) (bson.RawValue, error) { + // First, try to look up the field verbatim. We must do this because field + // names can have dots in them: a shard key pattern like {"a.b": 1} will + // first match the literal field "a.b", and then the nested lookup for "b" + // in the embedded document "a". + rv, err := input.LookupErr(path) + + // If it's not there, and it's a dotted path (like a.b.c), try a nested lookup. + if errors.Is(err, bsoncore.ErrElementNotFound) && strings.Contains(path, ".") { + left, right, ok := strings.Cut(path, ".") + if !ok { + panic(fmt.Sprintf("Could not cut %#q on dot", path)) + } + + el, err := input.LookupErr(left) + if err != nil { + return el, err + } + + doc, ok := el.DocumentOK() + if !ok { + return rv, bsoncore.InvalidDepthTraversalError{ + Key: left, + Type: el.Type, + } + } + + return extractElementAtDottedPath(right, doc) + } + + return rv, err +} + +func (dk DocumentKey) String() string { + return bson.Raw(dk).String() +} + +// Raw provides access to the document key as a bson.Raw value. +func (dk DocumentKey) Raw() bson.Raw { + return bson.Raw(dk) +} + +// Bytes provides access to the underlying bytes of the document key. +func (dk DocumentKey) Bytes() []byte { + return []byte(dk) +} + +// ID returns the _id portion of the document key as a bson.RawValue. +func (dk DocumentKey) ID() bson.RawValue { + return dk.Raw().Lookup("_id") +} diff --git a/dockey/shardkey_pattern.go b/dockey/shardkey_pattern.go new file mode 100644 index 00000000..2b0d4b24 --- /dev/null +++ b/dockey/shardkey_pattern.go @@ -0,0 +1,117 @@ +package dockey + +import ( + "fmt" + + "github.com/pkg/errors" + "go.mongodb.org/mongo-driver/bson" +) + +// Pattern represents a collection shard key (the thing you'd pass to +// shardCollection). It's called a "pattern" to distinguish the collection's +// shard key from a document's shard key. The zero value for Pattern +// is useful by itself: it behaves as though a collection has no shard key at +// all. +type Pattern struct { + raw bson.Raw + hasID bool +} + +// EmptyPattern is an empty pattern; it can be used to represent unsharded +// collections. +var EmptyPattern Pattern + +// NewPattern takes a raw document, like the thing you'd find in +// config.collections. It validates that the shard key is valid BSON, and +// returns a Pattern. +func NewPattern(raw bson.Raw) (Pattern, error) { + if len(raw) == 0 { + return Pattern{}, nil + } + + err := raw.Validate() + if err != nil { + return Pattern{}, errors.Wrap(err, "failed to validate shard key pattern") + } + + _, lookupErr := raw.LookupErr("_id") + hasID := lookupErr == nil + + return Pattern{raw, hasID}, nil +} + +// PatternFromMarshalable takes a marshalable object (like a bson.D) and +// returns a new Pattern. This function panics on any error. It's mostly here +// because it's convenient to use in tests, where you know you'll have a valid +// pattern. +func PatternFromMarshalable(key any) Pattern { + raw, err := bson.Marshal(key) + if err != nil { + panic(err) + } + + p, err := NewPattern(raw) + if err != nil { + panic(err) + } + + return p +} + +// ContainsID returns true if the shard key pattern contains the _id field. +func (p Pattern) ContainsID() bool { + return p.hasID +} + +// IsEmpty returns true if this is a zero-value pattern. +func (p Pattern) IsEmpty() bool { + return len(p.raw) == 0 +} + +// Elements returns a slice of bson.RawElement values for every element of the +// shard key. It works like the bson.Raw.Elements method, but does not return +// an error (because we validate the underlying raw document in the +// constructor). +func (p Pattern) Elements() []bson.RawElement { + elems, err := p.raw.Elements() + + // We've validated this in the constructor, so if this happens then someone + // has done something nasty. + if err != nil { + panic( + fmt.Sprintf("got error calling raw.Elements() on a validated raw: %s", err), + ) + } + + return elems +} + +// String returns a JSON representation of the pattern. +func (p Pattern) String() string { + return p.raw.String() +} + +// MarshalBSON implements the bson.Marshaler interface. +func (p Pattern) MarshalBSON() ([]byte, error) { + // We just need to stash the raw here; we can fill back in hasID when we + // unmarshal. + return bson.Marshal(p.raw) +} + +// MarshalBSON implements the bson.Unmarshaler interface. +func (p *Pattern) UnmarshalBSON(data []byte) error { + var raw bson.Raw + err := bson.Unmarshal(data, &raw) + if err != nil { + return err + } + + dupe, err := NewPattern(raw) + if err != nil { + return err + } + + p.raw = dupe.raw + p.hasID = dupe.hasID + return nil +} diff --git a/dockey/unit_test.go b/dockey/unit_test.go new file mode 100644 index 00000000..7c9a435b --- /dev/null +++ b/dockey/unit_test.go @@ -0,0 +1,112 @@ +package dockey + +import ( + "testing" + + "github.com/stretchr/testify/suite" + "go.mongodb.org/mongo-driver/bson" +) + +type UnitTestSuite struct { + suite.Suite +} + +func TestUnitTestSuite(t *testing.T) { + ts := new(UnitTestSuite) + suite.Run(t, ts) +} + +var testCases = []struct { + doc bson.D + docKey bson.D +}{ + { + doc: bson.D{ + {"foo", bson.D{ + {"bar", bson.D{{"baz", 1}}}, + {"bar.baz", 2}, + }}, + {"foo.bar", bson.D{{"baz", 3}}}, + {"foo.bar.baz", 4}, + }, + docKey: bson.D{ + {"foo.bar.baz", int32(4)}, + }, + }, + { + doc: bson.D{ + {"foo", bson.D{ + {"bar", bson.D{{"baz", 1}}}, + {"bar.baz", 2}, + }}, + {"foo.bar", bson.D{{"baz", 3}}}, + }, + docKey: bson.D{ + {"foo.bar.baz", int32(2)}, + }, + }, + { + doc: bson.D{ + {"foo", bson.D{ + {"bar", bson.D{{"baz", 1}}}, + }}, + {"foo.bar", bson.D{{"baz", 3}}}, + }, + docKey: bson.D{ + {"foo.bar.baz", int32(1)}, + }, + }, + { + doc: bson.D{ + {"foo.bar", bson.D{{"baz", 3}}}, + }, + docKey: bson.D{}, // _id only + }, + { + doc: bson.D{ + {"foo", bson.D{{"bar.baz", nil}}}, + }, + docKey: bson.D{ + {"foo.bar.baz", nil}, + }, + }, +} + +func (s *UnitTestSuite) TestExtractDocumentKey() { + patternRaw, err := bson.Marshal(bson.D{ + {"_id", 1}, + {"foo.bar.baz", 1}, + }) + s.Require().NoError(err) + + pattern, err := NewPattern(patternRaw) + s.Require().NoError(err) + + for _, curCase := range testCases { + doc, err := bson.Marshal( + append( + bson.D{{"_id", true}}, + curCase.doc..., + ), + ) + s.Require().NoError(err) + + docKeyRaw, err := ExtractDocumentKey(pattern, doc) + s.Require().NoError(err) + + var docKey bson.D + s.Require().NoError(bson.Unmarshal(docKeyRaw, &docKey)) + + s.Assert().Equal( + append( + bson.D{{"_id", true}}, + curCase.docKey..., + ), + docKey, + "doc key for %v should be %v", + curCase.doc, + curCase.docKey, + ) + } + +} diff --git a/internal/verifier/compare.go b/internal/verifier/compare.go index 18b32239..7c9837dd 100644 --- a/internal/verifier/compare.go +++ b/internal/verifier/compare.go @@ -631,12 +631,9 @@ func transformPipelineForToHashedIndexKey( slices.Clone(in), bson.D{{"$replaceWith", bson.D{ // Single-letter field names minimize the document size. - {docKeyInHashedCompare, bson.D(lo.Map( + {docKeyInHashedCompare, extractDocKeyAgg( task.QueryFilter.GetDocKeyFields(), - func(f string, _ int) bson.E { - return bson.E{f, "$$ROOT." + f} - }, - ))}, + )}, {"h", bson.D{ {"$toHashedIndexKey", bson.D{ {"$_internalKeyStringValue", bson.D{ From 0ac82bc5b949ab4169cac5e07884a55768723571 Mon Sep 17 00:00:00 2001 From: Felipe Gasper Date: Mon, 11 Aug 2025 19:43:10 -0400 Subject: [PATCH 02/32] no base64 --- dockey/agg.go | 68 ++++++++++++++------------------------------------- 1 file changed, 19 insertions(+), 49 deletions(-) diff --git a/dockey/agg.go b/dockey/agg.go index 793c7f07..325621b7 100644 --- a/dockey/agg.go +++ b/dockey/agg.go @@ -1,74 +1,44 @@ package dockey import ( + "strconv" "strings" "go.mongodb.org/mongo-driver/bson" ) +// This function is important enough that we test it directly. +// NOTE: The document contains base64-encoded names. func ExtractDocKeyAgg(fieldNames []string) bson.D { return extractDocKeyFromAgg(fieldNames, "$$ROOT") } func extractDocKeyFromAgg(fieldNames []string, rootExpr any) bson.D { - var docKeyPieces [][2]any + var docKeyNumKeys bson.D + numToKeyLookup := bson.M{} - for _, name := range fieldNames { + for n, name := range fieldNames { var valExpr = extractKeyValueAgg(name, rootExpr) - docKeyPieces = append(docKeyPieces, [...]any{name, valExpr}) + nStr := strconv.Itoa(n) + docKeyNumKeys = append(docKeyNumKeys, bson.E{nStr, valExpr}) + numToKeyLookup[nStr] = name } return bson.D{ - {"$arrayToObject", bson.A{docKeyPieces}}, - } -} - -func existsAgg(fieldName string, baseDocExpr any) bson.D { - base, remainder, hasDot := strings.Cut(fieldName, ".") - - fieldExpr := bson.D{ - {"$getField", bson.D{ - {"field", fieldName}, - {"input", baseDocExpr}, - }}, - } - - if !hasDot { - return bson.D{ - {"$ne", bson.A{ - "missing", - bson.D{{"$type", fieldExpr}}, - }}, - } - } - - embDocExpr := bson.D{ - {"$getField", bson.D{ - {"field", base}, - {"input", baseDocExpr}, - }}, - } - - return bson.D{ - {"$cond", bson.D{ - {"if", bson.D{ - {"$ne", bson.A{ - "missing", - bson.D{{"$type", fieldExpr}}, + {"$arrayToObject", bson.D{ + {"$map", bson.D{ + {"input", bson.D{ + {"$objectToArray", docKeyNumKeys}, }}, - }}, - {"then", true}, - {"else", bson.D{ - {"$cond", bson.D{ - {"if", bson.D{ - {"$eq", bson.A{ - "object", - bson.D{{"$type", embDocExpr}}, + {"in", bson.D{ + {"k", bson.D{ + {"$getField", bson.D{ + {"field", "$$this.k"}, + {"input", numToKeyLookup}, }}, }}, - {"then", extractKeyValueAgg(remainder, embDocExpr)}, - {"else", false}, + {"v", "$$this.v"}, }}, }}, }}, From 828f072a94b622443ee0bd30346679570eb6ff99 Mon Sep 17 00:00:00 2001 From: Felipe Gasper Date: Tue, 12 Aug 2025 05:34:30 -0400 Subject: [PATCH 03/32] refactor a bit --- dockey/agg.go | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/dockey/agg.go b/dockey/agg.go index 325621b7..cb6f64c3 100644 --- a/dockey/agg.go +++ b/dockey/agg.go @@ -15,30 +15,47 @@ func ExtractDocKeyAgg(fieldNames []string) bson.D { func extractDocKeyFromAgg(fieldNames []string, rootExpr any) bson.D { var docKeyNumKeys bson.D - numToKeyLookup := bson.M{} + numToKeyLookup := map[string]string{} for n, name := range fieldNames { var valExpr = extractKeyValueAgg(name, rootExpr) + // Aggregation forbids direct creation of an object with dotted keys. + // So here we create an object with numeric keys, then below we’ll + // map the numeric keys back to the real ones. + nStr := strconv.Itoa(n) docKeyNumKeys = append(docKeyNumKeys, bson.E{nStr, valExpr}) numToKeyLookup[nStr] = name } + // Now convert the numeric keys back to the real ones. + return mapObjectKeysAgg(docKeyNumKeys, numToKeyLookup) +} + +// Potentially reusable: +func mapObjectKeysAgg(expr any, mapping map[string]string) bson.D { return bson.D{ {"$arrayToObject", bson.D{ {"$map", bson.D{ {"input", bson.D{ - {"$objectToArray", docKeyNumKeys}, + {"$objectToArray", expr}, }}, {"in", bson.D{ - {"k", bson.D{ - {"$getField", bson.D{ - {"field", "$$this.k"}, - {"input", numToKeyLookup}, + {"$let", bson.D{ + {"vars", bson.D{ + {"keyLookup", mapping}, + }}, + {"in", bson.D{ + {"k", bson.D{ + {"$getField", bson.D{ + {"field", "$$this.k"}, + {"input", "$$keyLookup"}, + }}, + }}, + {"v", "$$this.v"}, }}, }}, - {"v", "$$this.v"}, }}, }}, }}, From bb0ac8b98beba303a331813035951d15ad58dd7d Mon Sep 17 00:00:00 2001 From: Felipe Gasper Date: Tue, 12 Aug 2025 09:21:29 -0400 Subject: [PATCH 04/32] fix agg --- dockey/agg.go | 29 +++++++++++++++++++++-------- dockey/agg_test.go | 2 +- internal/verifier/compare.go | 4 +++- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/dockey/agg.go b/dockey/agg.go index cb6f64c3..8e0a3fb8 100644 --- a/dockey/agg.go +++ b/dockey/agg.go @@ -7,18 +7,31 @@ import ( "go.mongodb.org/mongo-driver/bson" ) -// This function is important enough that we test it directly. -// NOTE: The document contains base64-encoded names. -func ExtractDocKeyAgg(fieldNames []string) bson.D { - return extractDocKeyFromAgg(fieldNames, "$$ROOT") -} - -func extractDocKeyFromAgg(fieldNames []string, rootExpr any) bson.D { +// ExtractDocKeyAgg returns an aggregation expression that extracts the +// document key from the document to which the `docExpr` refers. +// +// For example, if the doc key field names are [`_id`, `lastName`], this +// function’s returned expression will yield the same document that shows +// as `documentKey` in the change stream if that document is inserted. +// Any fields missing in the document will be excluded from the document key. +// +// This correctly reproduces the server’s behavior when field names +// contain dots: it tries the full field name, then pops a “level” off +// the name & checks a level further in the document (again privileging +// the full field name’s remainder), and so on. For example, if the field +// name is `foo.bar.baz`, this will look for these fields in this order +// (JSON pointer notation): +// - `/foo.bar.baz` +// - `/foo/bar.baz` +// - `/foo/bar/baz` +// +// Note that the above DOES NOT include `/foo.bar/baz`. +func ExtractDocKeyAgg(fieldNames []string, docExpr any) bson.D { var docKeyNumKeys bson.D numToKeyLookup := map[string]string{} for n, name := range fieldNames { - var valExpr = extractKeyValueAgg(name, rootExpr) + var valExpr = extractKeyValueAgg(name, docExpr) // Aggregation forbids direct creation of an object with dotted keys. // So here we create an object with numeric keys, then below we’ll diff --git a/dockey/agg_test.go b/dockey/agg_test.go index 5b4ef8a3..21ffe35b 100644 --- a/dockey/agg_test.go +++ b/dockey/agg_test.go @@ -53,7 +53,7 @@ func TestExtractDocKeyAgg(t *testing.T) { ).Err(), ) - computedDocKeyAgg := extractDocKeyFromAgg( + computedDocKeyAgg := ExtractDocKeyAgg( mslices.Of("foo.bar.baz", "_id"), "$$ROOT.fullDocument", ) diff --git a/internal/verifier/compare.go b/internal/verifier/compare.go index 7c9837dd..f85520c5 100644 --- a/internal/verifier/compare.go +++ b/internal/verifier/compare.go @@ -8,6 +8,7 @@ import ( "github.com/10gen/migration-verifier/chanutil" "github.com/10gen/migration-verifier/contextplus" + "github.com/10gen/migration-verifier/dockey" "github.com/10gen/migration-verifier/internal/reportutils" "github.com/10gen/migration-verifier/internal/retry" "github.com/10gen/migration-verifier/internal/types" @@ -631,8 +632,9 @@ func transformPipelineForToHashedIndexKey( slices.Clone(in), bson.D{{"$replaceWith", bson.D{ // Single-letter field names minimize the document size. - {docKeyInHashedCompare, extractDocKeyAgg( + {docKeyInHashedCompare, dockey.ExtractDocKeyAgg( task.QueryFilter.GetDocKeyFields(), + "$$ROOT", )}, {"h", bson.D{ {"$toHashedIndexKey", bson.D{ From b965033b24ee2190d2da137173a26694257e3b63 Mon Sep 17 00:00:00 2001 From: Felipe Gasper Date: Wed, 13 Aug 2025 13:02:57 -0400 Subject: [PATCH 05/32] bring in mongosync --- dockey/agg.go | 2 + dockey/agg_test.go | 56 + dockey/shardkey_pattern.go | 117 - dockey/unit_test.go | 112 - dockeys/README.md | 22 + dockey/dockey.go => dockeys/document_keys.go | 41 +- go.mod | 13 +- go.sum | 14 + internal/verifier/compare.go | 68 +- vendor/github.com/golang/snappy/README | 7 +- .../github.com/golang/snappy/encode_arm64.s | 4 +- .../github.com/klauspost/compress/.gitignore | 7 + .../klauspost/compress/.goreleaser.yml | 4 + .../github.com/klauspost/compress/README.md | 220 +- .../github.com/klauspost/compress/SECURITY.md | 25 + .../klauspost/compress/fse/compress.go | 31 +- .../klauspost/compress/fse/decompress.go | 4 +- .../klauspost/compress/huff0/bitreader.go | 130 +- .../klauspost/compress/huff0/bitwriter.go | 133 +- .../klauspost/compress/huff0/bytereader.go | 10 - .../klauspost/compress/huff0/compress.go | 127 +- .../klauspost/compress/huff0/decompress.go | 744 ++- .../compress/huff0/decompress_amd64.go | 226 + .../compress/huff0/decompress_amd64.s | 830 ++++ .../compress/huff0/decompress_generic.go | 299 ++ .../klauspost/compress/huff0/huff0.go | 2 + .../compress/internal/cpuinfo/cpuinfo.go | 34 + .../internal/cpuinfo/cpuinfo_amd64.go | 11 + .../compress/internal/cpuinfo/cpuinfo_amd64.s | 36 + .../compress/internal/snapref/encode_other.go | 38 +- .../klauspost/compress/zstd/README.md | 172 +- .../klauspost/compress/zstd/bitreader.go | 12 +- .../klauspost/compress/zstd/bitwriter.go | 98 +- .../klauspost/compress/zstd/blockdec.go | 526 +-- .../klauspost/compress/zstd/blockenc.go | 117 +- .../klauspost/compress/zstd/bytebuf.go | 27 +- .../klauspost/compress/zstd/bytereader.go | 6 - .../klauspost/compress/zstd/decodeheader.go | 93 +- .../klauspost/compress/zstd/decoder.go | 685 ++- .../compress/zstd/decoder_options.go | 103 +- .../klauspost/compress/zstd/dict.go | 51 +- .../klauspost/compress/zstd/enc_base.go | 17 +- .../klauspost/compress/zstd/enc_best.go | 264 +- .../klauspost/compress/zstd/enc_better.go | 43 +- .../klauspost/compress/zstd/enc_dfast.go | 35 +- .../klauspost/compress/zstd/enc_fast.go | 176 +- .../klauspost/compress/zstd/encoder.go | 151 +- .../compress/zstd/encoder_options.go | 55 +- .../klauspost/compress/zstd/framedec.go | 342 +- .../klauspost/compress/zstd/fse_decoder.go | 128 +- .../compress/zstd/fse_decoder_amd64.go | 65 + .../compress/zstd/fse_decoder_amd64.s | 126 + .../compress/zstd/fse_decoder_generic.go | 72 + .../klauspost/compress/zstd/fse_encoder.go | 28 +- .../klauspost/compress/zstd/hash.go | 6 - .../klauspost/compress/zstd/history.go | 67 +- .../compress/zstd/internal/xxhash/README.md | 49 +- .../compress/zstd/internal/xxhash/xxhash.go | 47 +- .../zstd/internal/xxhash/xxhash_amd64.go | 12 - .../zstd/internal/xxhash/xxhash_amd64.s | 337 +- .../zstd/internal/xxhash/xxhash_arm64.s | 184 + .../zstd/internal/xxhash/xxhash_asm.go | 16 + .../zstd/internal/xxhash/xxhash_other.go | 23 +- .../klauspost/compress/zstd/matchlen_amd64.go | 16 + .../klauspost/compress/zstd/matchlen_amd64.s | 68 + .../compress/zstd/matchlen_generic.go | 33 + .../klauspost/compress/zstd/seqdec.go | 328 +- .../klauspost/compress/zstd/seqdec_amd64.go | 394 ++ .../klauspost/compress/zstd/seqdec_amd64.s | 4175 +++++++++++++++++ .../klauspost/compress/zstd/seqdec_generic.go | 237 + .../github.com/klauspost/compress/zstd/zip.go | 69 +- .../klauspost/compress/zstd/zstd.go | 55 +- vendor/go.mongodb.org/mongo-driver/v2/LICENSE | 201 + .../mongo-driver/v2/bson/array_codec.go | 42 + .../mongo-driver/v2/bson/bsoncodec.go | 203 + .../mongo-driver/v2/bson/byte_slice_codec.go | 97 + .../mongo-driver/v2/bson/codec_cache.go | 166 + .../mongo-driver/v2/bson/cond_addr_codec.go | 61 + .../mongo-driver/v2/bson/copier.go | 431 ++ .../mongo-driver/v2/bson/decimal.go | 339 ++ .../mongo-driver/v2/bson/decoder.go | 136 + .../v2/bson/default_value_decoders.go | 1518 ++++++ .../v2/bson/default_value_encoders.go | 517 ++ .../mongo-driver/v2/bson/doc.go | 155 + .../v2/bson/empty_interface_codec.go | 127 + .../mongo-driver/v2/bson/encoder.go | 130 + .../mongo-driver/v2/bson/extjson_parser.go | 804 ++++ .../mongo-driver/v2/bson/extjson_reader.go | 606 +++ .../mongo-driver/v2/bson/extjson_tables.go | 223 + .../mongo-driver/v2/bson/extjson_wrappers.go | 489 ++ .../mongo-driver/v2/bson/extjson_writer.go | 690 +++ .../mongo-driver/v2/bson/json_scanner.go | 533 +++ .../mongo-driver/v2/bson/map_codec.go | 292 ++ .../mongo-driver/v2/bson/marshal.go | 191 + .../mongo-driver/v2/bson/mgoregistry.go | 209 + .../mongo-driver/v2/bson/mode.go | 82 + .../mongo-driver/v2/bson/objectid.go | 207 + .../mongo-driver/v2/bson/pointer_codec.go | 88 + .../mongo-driver/v2/bson/primitive.go | 379 ++ .../mongo-driver/v2/bson/primitive_codecs.go | 89 + .../mongo-driver/v2/bson/raw.go | 93 + .../mongo-driver/v2/bson/raw_array.go | 73 + .../mongo-driver/v2/bson/raw_element.go | 48 + .../mongo-driver/v2/bson/raw_value.go | 306 ++ .../mongo-driver/v2/bson/reader.go | 49 + .../mongo-driver/v2/bson/registry.go | 381 ++ .../mongo-driver/v2/bson/slice_codec.go | 173 + .../mongo-driver/v2/bson/string_codec.go | 107 + .../mongo-driver/v2/bson/struct_codec.go | 695 +++ .../mongo-driver/v2/bson/struct_tag_parser.go | 123 + .../mongo-driver/v2/bson/time_codec.go | 109 + .../mongo-driver/v2/bson/types.go | 118 + .../mongo-driver/v2/bson/uint_codec.go | 161 + .../mongo-driver/v2/bson/unmarshal.go | 90 + .../mongo-driver/v2/bson/value_reader.go | 898 ++++ .../mongo-driver/v2/bson/value_writer.go | 600 +++ .../mongo-driver/v2/bson/vector.go | 268 ++ .../mongo-driver/v2/bson/writer.go | 61 + .../v2/internal/bsoncoreutil/bsoncoreutil.go | 40 + .../v2/internal/decimal128/decinal128.go | 117 + .../mongo-driver/v2/x/bsonx/bsoncore/array.go | 185 + .../v2/x/bsonx/bsoncore/bson_arraybuilder.go | 198 + .../x/bsonx/bsoncore/bson_documentbuilder.go | 184 + .../v2/x/bsonx/bsoncore/bsoncore.go | 842 ++++ .../mongo-driver/v2/x/bsonx/bsoncore/doc.go | 34 + .../v2/x/bsonx/bsoncore/document.go | 411 ++ .../v2/x/bsonx/bsoncore/element.go | 166 + .../v2/x/bsonx/bsoncore/iterator.go | 113 + .../v2/x/bsonx/bsoncore/tables.go | 223 + .../mongo-driver/v2/x/bsonx/bsoncore/type.go | 85 + .../mongo-driver/v2/x/bsonx/bsoncore/value.go | 986 ++++ vendor/golang.org/x/crypto/sha3/doc.go | 4 + vendor/golang.org/x/crypto/sha3/hashes.go | 31 +- vendor/golang.org/x/crypto/sha3/sha3.go | 187 +- vendor/golang.org/x/crypto/sha3/shake.go | 89 +- vendor/golang.org/x/crypto/sha3/xor.go | 40 - vendor/golang.org/x/sync/errgroup/errgroup.go | 1 + .../golang.org/x/sys/cpu/asm_darwin_x86_gc.s | 17 + vendor/golang.org/x/sys/cpu/cpu.go | 22 + vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go | 61 + vendor/golang.org/x/sys/cpu/cpu_gc_x86.go | 4 +- .../x/sys/cpu/{cpu_x86.s => cpu_gc_x86.s} | 2 +- vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go | 6 - .../golang.org/x/sys/cpu/cpu_linux_arm64.go | 1 - .../golang.org/x/sys/cpu/cpu_linux_noinit.go | 2 +- .../golang.org/x/sys/cpu/cpu_linux_riscv64.go | 137 + vendor/golang.org/x/sys/cpu/cpu_other_x86.go | 11 + vendor/golang.org/x/sys/cpu/cpu_riscv64.go | 11 +- vendor/golang.org/x/sys/cpu/cpu_x86.go | 27 +- .../x/sys/cpu/syscall_darwin_x86_gc.go | 98 + vendor/golang.org/x/sys/unix/README.md | 2 +- vendor/golang.org/x/sys/unix/auxv.go | 36 + .../golang.org/x/sys/unix/auxv_unsupported.go | 13 + vendor/golang.org/x/sys/unix/ioctl_linux.go | 96 + vendor/golang.org/x/sys/unix/mkerrors.sh | 17 +- vendor/golang.org/x/sys/unix/syscall_aix.go | 2 +- .../golang.org/x/sys/unix/syscall_darwin.go | 37 + .../x/sys/unix/syscall_dragonfly.go | 12 + vendor/golang.org/x/sys/unix/syscall_hurd.go | 1 + vendor/golang.org/x/sys/unix/syscall_linux.go | 64 +- .../x/sys/unix/syscall_linux_arm64.go | 2 + .../x/sys/unix/syscall_linux_loong64.go | 2 + .../x/sys/unix/syscall_linux_riscv64.go | 2 + .../golang.org/x/sys/unix/syscall_solaris.go | 87 + .../x/sys/unix/syscall_zos_s390x.go | 104 +- .../golang.org/x/sys/unix/vgetrandom_linux.go | 13 + .../x/sys/unix/vgetrandom_unsupported.go | 11 + .../x/sys/unix/zerrors_darwin_amd64.go | 7 + .../x/sys/unix/zerrors_darwin_arm64.go | 7 + vendor/golang.org/x/sys/unix/zerrors_linux.go | 64 +- .../x/sys/unix/zerrors_linux_386.go | 28 + .../x/sys/unix/zerrors_linux_amd64.go | 28 + .../x/sys/unix/zerrors_linux_arm.go | 28 + .../x/sys/unix/zerrors_linux_arm64.go | 30 + .../x/sys/unix/zerrors_linux_loong64.go | 28 + .../x/sys/unix/zerrors_linux_mips.go | 28 + .../x/sys/unix/zerrors_linux_mips64.go | 28 + .../x/sys/unix/zerrors_linux_mips64le.go | 28 + .../x/sys/unix/zerrors_linux_mipsle.go | 28 + .../x/sys/unix/zerrors_linux_ppc.go | 28 + .../x/sys/unix/zerrors_linux_ppc64.go | 28 + .../x/sys/unix/zerrors_linux_ppc64le.go | 28 + .../x/sys/unix/zerrors_linux_riscv64.go | 28 + .../x/sys/unix/zerrors_linux_s390x.go | 28 + .../x/sys/unix/zerrors_linux_sparc64.go | 28 + .../x/sys/unix/zerrors_zos_s390x.go | 2 + .../x/sys/unix/zsyscall_darwin_amd64.go | 20 + .../x/sys/unix/zsyscall_darwin_amd64.s | 5 + .../x/sys/unix/zsyscall_darwin_arm64.go | 20 + .../x/sys/unix/zsyscall_darwin_arm64.s | 5 + .../golang.org/x/sys/unix/zsyscall_linux.go | 27 +- .../x/sys/unix/zsyscall_solaris_amd64.go | 114 + .../x/sys/unix/zsysnum_linux_386.go | 4 + .../x/sys/unix/zsysnum_linux_amd64.go | 5 + .../x/sys/unix/zsysnum_linux_arm.go | 4 + .../x/sys/unix/zsysnum_linux_arm64.go | 6 +- .../x/sys/unix/zsysnum_linux_loong64.go | 6 + .../x/sys/unix/zsysnum_linux_mips.go | 4 + .../x/sys/unix/zsysnum_linux_mips64.go | 4 + .../x/sys/unix/zsysnum_linux_mips64le.go | 4 + .../x/sys/unix/zsysnum_linux_mipsle.go | 4 + .../x/sys/unix/zsysnum_linux_ppc.go | 4 + .../x/sys/unix/zsysnum_linux_ppc64.go | 4 + .../x/sys/unix/zsysnum_linux_ppc64le.go | 4 + .../x/sys/unix/zsysnum_linux_riscv64.go | 6 +- .../x/sys/unix/zsysnum_linux_s390x.go | 4 + .../x/sys/unix/zsysnum_linux_sparc64.go | 4 + .../x/sys/unix/ztypes_darwin_amd64.go | 73 + .../x/sys/unix/ztypes_darwin_arm64.go | 73 + .../x/sys/unix/ztypes_freebsd_386.go | 1 + .../x/sys/unix/ztypes_freebsd_amd64.go | 1 + .../x/sys/unix/ztypes_freebsd_arm.go | 1 + .../x/sys/unix/ztypes_freebsd_arm64.go | 1 + .../x/sys/unix/ztypes_freebsd_riscv64.go | 1 + vendor/golang.org/x/sys/unix/ztypes_linux.go | 231 +- .../x/sys/unix/ztypes_linux_riscv64.go | 33 + .../golang.org/x/sys/unix/ztypes_zos_s390x.go | 6 + vendor/modules.txt | 21 +- 218 files changed, 28976 insertions(+), 3295 deletions(-) delete mode 100644 dockey/shardkey_pattern.go delete mode 100644 dockey/unit_test.go create mode 100644 dockeys/README.md rename dockey/dockey.go => dockeys/document_keys.go (71%) create mode 100644 vendor/github.com/klauspost/compress/SECURITY.md create mode 100644 vendor/github.com/klauspost/compress/huff0/decompress_amd64.go create mode 100644 vendor/github.com/klauspost/compress/huff0/decompress_amd64.s create mode 100644 vendor/github.com/klauspost/compress/huff0/decompress_generic.go create mode 100644 vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go create mode 100644 vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go create mode 100644 vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s create mode 100644 vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go create mode 100644 vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s create mode 100644 vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go delete mode 100644 vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go create mode 100644 vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s create mode 100644 vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go create mode 100644 vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go create mode 100644 vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s create mode 100644 vendor/github.com/klauspost/compress/zstd/matchlen_generic.go create mode 100644 vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go create mode 100644 vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s create mode 100644 vendor/github.com/klauspost/compress/zstd/seqdec_generic.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/LICENSE create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/array_codec.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/bsoncodec.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/byte_slice_codec.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/codec_cache.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/cond_addr_codec.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/copier.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/decimal.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/decoder.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/default_value_decoders.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/default_value_encoders.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/doc.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/empty_interface_codec.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/encoder.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/extjson_parser.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/extjson_reader.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/extjson_tables.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/extjson_wrappers.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/extjson_writer.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/json_scanner.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/map_codec.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/marshal.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/mgoregistry.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/mode.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/objectid.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/pointer_codec.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/primitive.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/primitive_codecs.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/raw.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/raw_array.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/raw_element.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/raw_value.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/reader.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/registry.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/slice_codec.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/string_codec.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/struct_codec.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/struct_tag_parser.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/time_codec.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/types.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/uint_codec.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/unmarshal.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/value_reader.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/value_writer.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/vector.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/bson/writer.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/internal/bsoncoreutil/bsoncoreutil.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/internal/decimal128/decinal128.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore/array.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore/bson_arraybuilder.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore/bson_documentbuilder.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore/bsoncore.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore/doc.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore/document.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore/element.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore/iterator.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore/tables.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore/type.go create mode 100644 vendor/go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore/value.go delete mode 100644 vendor/golang.org/x/crypto/sha3/xor.go create mode 100644 vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s create mode 100644 vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go rename vendor/golang.org/x/sys/cpu/{cpu_x86.s => cpu_gc_x86.s} (94%) create mode 100644 vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go create mode 100644 vendor/golang.org/x/sys/cpu/cpu_other_x86.go create mode 100644 vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go create mode 100644 vendor/golang.org/x/sys/unix/auxv.go create mode 100644 vendor/golang.org/x/sys/unix/auxv_unsupported.go create mode 100644 vendor/golang.org/x/sys/unix/vgetrandom_linux.go create mode 100644 vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go diff --git a/dockey/agg.go b/dockey/agg.go index 8e0a3fb8..dc62fe8e 100644 --- a/dockey/agg.go +++ b/dockey/agg.go @@ -1,3 +1,5 @@ +// Package dockey contains logic related to document key determination. + package dockey import ( diff --git a/dockey/agg_test.go b/dockey/agg_test.go index 21ffe35b..a95385fc 100644 --- a/dockey/agg_test.go +++ b/dockey/agg_test.go @@ -14,6 +14,62 @@ import ( "go.mongodb.org/mongo-driver/mongo/options" ) +var testCases = []struct { + doc bson.D + docKey bson.D +}{ + { + doc: bson.D{ + {"foo", bson.D{ + {"bar", bson.D{{"baz", 1}}}, + {"bar.baz", 2}, + }}, + {"foo.bar", bson.D{{"baz", 3}}}, + {"foo.bar.baz", 4}, + }, + docKey: bson.D{ + {"foo.bar.baz", int32(4)}, + }, + }, + { + doc: bson.D{ + {"foo", bson.D{ + {"bar", bson.D{{"baz", 1}}}, + {"bar.baz", 2}, + }}, + {"foo.bar", bson.D{{"baz", 3}}}, + }, + docKey: bson.D{ + {"foo.bar.baz", int32(2)}, + }, + }, + { + doc: bson.D{ + {"foo", bson.D{ + {"bar", bson.D{{"baz", 1}}}, + }}, + {"foo.bar", bson.D{{"baz", 3}}}, + }, + docKey: bson.D{ + {"foo.bar.baz", int32(1)}, + }, + }, + { + doc: bson.D{ + {"foo.bar", bson.D{{"baz", 3}}}, + }, + docKey: bson.D{}, // _id only + }, + { + doc: bson.D{ + {"foo", bson.D{{"bar.baz", nil}}}, + }, + docKey: bson.D{ + {"foo.bar.baz", nil}, + }, + }, +} + func TestExtractDocKeyAgg(t *testing.T) { ctx := t.Context() diff --git a/dockey/shardkey_pattern.go b/dockey/shardkey_pattern.go deleted file mode 100644 index 2b0d4b24..00000000 --- a/dockey/shardkey_pattern.go +++ /dev/null @@ -1,117 +0,0 @@ -package dockey - -import ( - "fmt" - - "github.com/pkg/errors" - "go.mongodb.org/mongo-driver/bson" -) - -// Pattern represents a collection shard key (the thing you'd pass to -// shardCollection). It's called a "pattern" to distinguish the collection's -// shard key from a document's shard key. The zero value for Pattern -// is useful by itself: it behaves as though a collection has no shard key at -// all. -type Pattern struct { - raw bson.Raw - hasID bool -} - -// EmptyPattern is an empty pattern; it can be used to represent unsharded -// collections. -var EmptyPattern Pattern - -// NewPattern takes a raw document, like the thing you'd find in -// config.collections. It validates that the shard key is valid BSON, and -// returns a Pattern. -func NewPattern(raw bson.Raw) (Pattern, error) { - if len(raw) == 0 { - return Pattern{}, nil - } - - err := raw.Validate() - if err != nil { - return Pattern{}, errors.Wrap(err, "failed to validate shard key pattern") - } - - _, lookupErr := raw.LookupErr("_id") - hasID := lookupErr == nil - - return Pattern{raw, hasID}, nil -} - -// PatternFromMarshalable takes a marshalable object (like a bson.D) and -// returns a new Pattern. This function panics on any error. It's mostly here -// because it's convenient to use in tests, where you know you'll have a valid -// pattern. -func PatternFromMarshalable(key any) Pattern { - raw, err := bson.Marshal(key) - if err != nil { - panic(err) - } - - p, err := NewPattern(raw) - if err != nil { - panic(err) - } - - return p -} - -// ContainsID returns true if the shard key pattern contains the _id field. -func (p Pattern) ContainsID() bool { - return p.hasID -} - -// IsEmpty returns true if this is a zero-value pattern. -func (p Pattern) IsEmpty() bool { - return len(p.raw) == 0 -} - -// Elements returns a slice of bson.RawElement values for every element of the -// shard key. It works like the bson.Raw.Elements method, but does not return -// an error (because we validate the underlying raw document in the -// constructor). -func (p Pattern) Elements() []bson.RawElement { - elems, err := p.raw.Elements() - - // We've validated this in the constructor, so if this happens then someone - // has done something nasty. - if err != nil { - panic( - fmt.Sprintf("got error calling raw.Elements() on a validated raw: %s", err), - ) - } - - return elems -} - -// String returns a JSON representation of the pattern. -func (p Pattern) String() string { - return p.raw.String() -} - -// MarshalBSON implements the bson.Marshaler interface. -func (p Pattern) MarshalBSON() ([]byte, error) { - // We just need to stash the raw here; we can fill back in hasID when we - // unmarshal. - return bson.Marshal(p.raw) -} - -// MarshalBSON implements the bson.Unmarshaler interface. -func (p *Pattern) UnmarshalBSON(data []byte) error { - var raw bson.Raw - err := bson.Unmarshal(data, &raw) - if err != nil { - return err - } - - dupe, err := NewPattern(raw) - if err != nil { - return err - } - - p.raw = dupe.raw - p.hasID = dupe.hasID - return nil -} diff --git a/dockey/unit_test.go b/dockey/unit_test.go deleted file mode 100644 index 7c9a435b..00000000 --- a/dockey/unit_test.go +++ /dev/null @@ -1,112 +0,0 @@ -package dockey - -import ( - "testing" - - "github.com/stretchr/testify/suite" - "go.mongodb.org/mongo-driver/bson" -) - -type UnitTestSuite struct { - suite.Suite -} - -func TestUnitTestSuite(t *testing.T) { - ts := new(UnitTestSuite) - suite.Run(t, ts) -} - -var testCases = []struct { - doc bson.D - docKey bson.D -}{ - { - doc: bson.D{ - {"foo", bson.D{ - {"bar", bson.D{{"baz", 1}}}, - {"bar.baz", 2}, - }}, - {"foo.bar", bson.D{{"baz", 3}}}, - {"foo.bar.baz", 4}, - }, - docKey: bson.D{ - {"foo.bar.baz", int32(4)}, - }, - }, - { - doc: bson.D{ - {"foo", bson.D{ - {"bar", bson.D{{"baz", 1}}}, - {"bar.baz", 2}, - }}, - {"foo.bar", bson.D{{"baz", 3}}}, - }, - docKey: bson.D{ - {"foo.bar.baz", int32(2)}, - }, - }, - { - doc: bson.D{ - {"foo", bson.D{ - {"bar", bson.D{{"baz", 1}}}, - }}, - {"foo.bar", bson.D{{"baz", 3}}}, - }, - docKey: bson.D{ - {"foo.bar.baz", int32(1)}, - }, - }, - { - doc: bson.D{ - {"foo.bar", bson.D{{"baz", 3}}}, - }, - docKey: bson.D{}, // _id only - }, - { - doc: bson.D{ - {"foo", bson.D{{"bar.baz", nil}}}, - }, - docKey: bson.D{ - {"foo.bar.baz", nil}, - }, - }, -} - -func (s *UnitTestSuite) TestExtractDocumentKey() { - patternRaw, err := bson.Marshal(bson.D{ - {"_id", 1}, - {"foo.bar.baz", 1}, - }) - s.Require().NoError(err) - - pattern, err := NewPattern(patternRaw) - s.Require().NoError(err) - - for _, curCase := range testCases { - doc, err := bson.Marshal( - append( - bson.D{{"_id", true}}, - curCase.doc..., - ), - ) - s.Require().NoError(err) - - docKeyRaw, err := ExtractDocumentKey(pattern, doc) - s.Require().NoError(err) - - var docKey bson.D - s.Require().NoError(bson.Unmarshal(docKeyRaw, &docKey)) - - s.Assert().Equal( - append( - bson.D{{"_id", true}}, - curCase.docKey..., - ), - docKey, - "doc key for %v should be %v", - curCase.doc, - curCase.docKey, - ) - } - -} diff --git a/dockeys/README.md b/dockeys/README.md new file mode 100644 index 00000000..49f8d4a1 --- /dev/null +++ b/dockeys/README.md @@ -0,0 +1,22 @@ +# The `dockeys` package + +This package is responsible for computing document keys from full documents. A _document key_ +uniquely identifies a document: for replica sets, the document key is always just the `_id`, and in +sharded clusters, it is the `_id` + shard key. The verifier uses document keys to key the maps in +the aggregator: we need to do this because keying by `_id` is not sufficient, since the same `_id` +can exist on multiple shards with different shard keys. (This is not merely academic: mongosync +creates these transient duplicate `_id`s as part of its normal operation.) + +The logic in this package duplicates the server's logic for generating document keys. Ideally we +_wouldn't_ do this, and could rely on the server to generate them for us in all cases, but right now +there is no way to do that. (The verifier must generate document keys from documents at rest during +the initial collection scan.) + +To generate the document key for a document, pass the shard key pattern and the full document to +`ExtractDocumentKey`. That function will first iterate through the shard key and extract all of the +relevant field values from the document, if they are present. If the `_id` is not a part of the +shard key pattern, it also appends the `_id` field. + +**Important:** the tests for this package are in `verifier/dockeys_test`. They use all of the +verifier's test abstractions, and so it's easier for now to keep them there. (When mongosync and the +verifier share more test abstractions, it might be useful to move them.) diff --git a/dockey/dockey.go b/dockeys/document_keys.go similarity index 71% rename from dockey/dockey.go rename to dockeys/document_keys.go index 2b0d215e..b3dba3d4 100644 --- a/dockey/dockey.go +++ b/dockeys/document_keys.go @@ -1,12 +1,16 @@ -package dockey +// Package dockeys was copied from mongosync@5b99bac58405b and tweaked +// not to depend on mongosync’s shardkeys package but instead just to take +// a slice of field names. +package dockeys import ( "fmt" + "slices" "strings" "github.com/pkg/errors" - "go.mongodb.org/mongo-driver/bson" - "go.mongodb.org/mongo-driver/x/bsonx/bsoncore" + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore" ) // DocumentKey represents a document key from the change stream. @@ -18,41 +22,44 @@ func NewDocumentKey(raw bson.Raw) DocumentKey { return DocumentKey(raw) } -// ExtractDocumentKey takes a shard key pattern and a document and returns its +// ExtractDocumentKey takes a slice of shard key fields and a document and returns its // DocumentKey. This is a reimplementation of the same method in the server, // so that we can reliably generate document keys even without a change // stream. -func ExtractDocumentKey(pattern Pattern, input bson.Raw) (DocumentKey, error) { - builder, err := extractShardKey(pattern, input) +func ExtractDocumentKey(fields []string, inputBytes []byte) (DocumentKey, error) { + input := bson.Raw(inputBytes) + + builder, err := extractShardKey(fields, input) if err != nil { return nil, err } - if !pattern.ContainsID() { + if !slices.Contains(fields, "_id") { id, err := input.LookupErr("_id") if err != nil { return nil, errors.Wrap(err, "failed to get document _id") } - val := bsoncore.Value{Type: id.Type, Data: id.Value} + val := bsoncore.Value{Type: bsoncore.Type(id.Type), Data: id.Value} builder.AppendValue("_id", val) } return DocumentKey(builder.Build()), nil } -// extractShardKey does the shard-key part of the document key extraction. -func extractShardKey(pattern Pattern, input bson.Raw) (*bsoncore.DocumentBuilder, error) { +// extractShardKey does the shard-key part of the document key extraction. This code is a +// reimplementation of the server's extractElementsBasedOnTemplate function (in +// dotted_path_support.cpp), which is called from getDocumentKey in op_observer_util.cpp (which is +// used by the change stream to generate document keys). +func extractShardKey(fields []string, input bson.Raw) (*bsoncore.DocumentBuilder, error) { db := bsoncore.NewDocumentBuilder() - if pattern.IsEmpty() { + if len(fields) == 0 { return db, nil } // For every element of the shard key pattern: - for _, e := range pattern.Elements() { - key := e.Key() - + for _, key := range fields { rv, err := extractElementAtDottedPath(key, input) switch { @@ -70,7 +77,7 @@ func extractShardKey(pattern Pattern, input bson.Raw) (*bsoncore.DocumentBuilder default: // We found the element; add it to the result. - val := bsoncore.Value{Type: rv.Type, Data: rv.Value} + val := bsoncore.Value{Type: bsoncore.Type(rv.Type), Data: rv.Value} db.AppendValue(key, val) } } @@ -79,6 +86,8 @@ func extractShardKey(pattern Pattern, input bson.Raw) (*bsoncore.DocumentBuilder return db, nil } +// This code is a reimplementation of the server's extractElementAtDottedPath function in +// dotted_path_support.cpp. func extractElementAtDottedPath(path string, input bson.Raw) (bson.RawValue, error) { // First, try to look up the field verbatim. We must do this because field // names can have dots in them: a shard key pattern like {"a.b": 1} will @@ -102,7 +111,7 @@ func extractElementAtDottedPath(path string, input bson.Raw) (bson.RawValue, err if !ok { return rv, bsoncore.InvalidDepthTraversalError{ Key: left, - Type: el.Type, + Type: bsoncore.Type(el.Type), } } diff --git a/go.mod b/go.mod index 7edebef8..0b24d036 100644 --- a/go.mod +++ b/go.mod @@ -19,7 +19,7 @@ require ( github.com/urfave/cli v1.22.9 go.mongodb.org/mongo-driver v1.17.1 golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 - golang.org/x/sync v0.8.0 + golang.org/x/sync v0.11.0 gopkg.in/natefinch/lumberjack.v2 v2.0.0 ) @@ -32,10 +32,10 @@ require ( github.com/go-playground/universal-translator v0.18.0 // indirect github.com/go-playground/validator/v10 v10.10.0 // indirect github.com/goccy/go-json v0.9.7 // indirect - github.com/golang/snappy v0.0.4 // indirect + github.com/golang/snappy v1.0.0 // indirect github.com/huandu/go-clone v1.6.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/compress v1.13.6 // indirect + github.com/klauspost/compress v1.16.7 // indirect github.com/leodido/go-urn v1.2.1 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.19 // indirect @@ -51,10 +51,11 @@ require ( github.com/xdg-go/scram v1.1.2 // indirect github.com/xdg-go/stringprep v1.0.4 // indirect github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 // indirect - golang.org/x/crypto v0.26.0 // indirect + go.mongodb.org/mongo-driver/v2 v2.2.3 // indirect + golang.org/x/crypto v0.33.0 // indirect golang.org/x/net v0.21.0 // indirect - golang.org/x/sys v0.23.0 // indirect - golang.org/x/text v0.17.0 // indirect + golang.org/x/sys v0.30.0 // indirect + golang.org/x/text v0.22.0 // indirect google.golang.org/protobuf v1.28.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 918591a6..dabb3b87 100644 --- a/go.sum +++ b/go.sum @@ -32,6 +32,8 @@ github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5x github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs= +github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= @@ -48,6 +50,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc= github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= +github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= @@ -121,11 +125,15 @@ github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78/go.mod h1:aL8wCCfTfS github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.mongodb.org/mongo-driver v1.17.1 h1:Wic5cJIwJgSpBhe3lx3+/RybR5PiYRMpVFgO7cOHyIM= go.mongodb.org/mongo-driver v1.17.1/go.mod h1:wwWm/+BuOddhcq3n68LKRmgk2wXzmF6s0SFOa0GINL4= +go.mongodb.org/mongo-driver/v2 v2.2.3 h1:72uiGYXeSnUEQk37xvV9r067xzFQod4SOeAoOuq3+GM= +go.mongodb.org/mongo-driver/v2 v2.2.3/go.mod h1:qQkDMhCGWl3FN509DfdPd4GRBLU/41zqF/k8eTRceps= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= +golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus= +golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= @@ -138,6 +146,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= +golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -149,6 +159,8 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= +golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -158,6 +170,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= +golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= diff --git a/internal/verifier/compare.go b/internal/verifier/compare.go index f85520c5..81081e6f 100644 --- a/internal/verifier/compare.go +++ b/internal/verifier/compare.go @@ -9,6 +9,7 @@ import ( "github.com/10gen/migration-verifier/chanutil" "github.com/10gen/migration-verifier/contextplus" "github.com/10gen/migration-verifier/dockey" + "github.com/10gen/migration-verifier/dockeys" "github.com/10gen/migration-verifier/internal/reportutils" "github.com/10gen/migration-verifier/internal/retry" "github.com/10gen/migration-verifier/internal/types" @@ -127,16 +128,15 @@ func (verifier *Verifier) compareDocsFromChannels( // b) compares the new doc against its previously-received, cached // counterpart and records any mismatch. handleNewDoc := func(curDocWithTs docWithTs, isSrc bool) error { - docKeyValues := lo.Map( + docKeyValues, err := getDocKeyValues( + verifier.docCompareMethod, + curDocWithTs.doc, mapKeyFieldNames, - func(fieldName string, _ int) bson.RawValue { - return getDocKeyFieldFromComparison( - verifier.docCompareMethod, - curDocWithTs.doc, - fieldName, - ) - }, ) + if err != nil { + return errors.Wrapf(err, "extracting doc key (fields: %v) values from doc %+v", mapKeyFieldNames, curDocWithTs.doc) + } + mapKey := getMapKey(docKeyValues) var ourMap, theirMap map[string]docWithTs @@ -372,6 +372,58 @@ func getDocKeyFieldFromComparison( } } +func getDocKeyValues( + docCompareMethod DocCompareMethod, + doc bson.Raw, + fieldNames []string, +) ([]bson.RawValue, error) { + var docKey bson.Raw + + switch docCompareMethod { + case DocCompareBinary, DocCompareIgnoreOrder: + dk, err := dockeys.ExtractDocumentKey(fieldNames, doc) + if err != nil { + return nil, errors.Wrapf(err, "extracting doc key (fields: %v) from doc %+v", fieldNames, doc) + } + + docKey = bson.Raw(dk.Bytes()) + case DocCompareToHashedIndexKey: + docKeyVal, err := doc.LookupErr(docKeyInHashedCompare) + if err != nil { + return nil, errors.Wrapf(err, "fetching %#q from doc %v", docKeyInHashedCompare, doc) + } + + var isDoc bool + docKey, isDoc = docKeyVal.DocumentOK() + if !isDoc { + return nil, fmt.Errorf( + "%#q in doc %v is type %s but should be %s", + docKeyInHashedCompare, + doc, + docKeyVal.Type, + bson.TypeEmbeddedDocument, + ) + } + } + + var values []bson.RawValue + els, err := docKey.Elements() + if err != nil { + return nil, errors.Wrapf(err, "parsing doc key (%+v) of doc %+v", docKey, doc) + } + + for _, el := range els { + val, err := el.ValueErr() + if err != nil { + return nil, errors.Wrapf(err, "parsing doc key element (%+v) of doc %+v", el, doc) + } + + values = append(values, val) + } + + return values, nil +} + func simpleTimerReset(t *time.Timer, dur time.Duration) { if !t.Stop() { <-t.C diff --git a/vendor/github.com/golang/snappy/README b/vendor/github.com/golang/snappy/README index cea12879..fd191f78 100644 --- a/vendor/github.com/golang/snappy/README +++ b/vendor/github.com/golang/snappy/README @@ -1,8 +1,13 @@ The Snappy compression format in the Go programming language. -To download and install from source: +To use as a library: $ go get github.com/golang/snappy +To use as a binary: +$ go install github.com/golang/snappy/cmd/snappytool@latest +$ cat decoded | ~/go/bin/snappytool -e > encoded +$ cat encoded | ~/go/bin/snappytool -d > decoded + Unless otherwise noted, the Snappy-Go source files are distributed under the BSD-style license found in the LICENSE file. diff --git a/vendor/github.com/golang/snappy/encode_arm64.s b/vendor/github.com/golang/snappy/encode_arm64.s index f8d54adf..f0c876a2 100644 --- a/vendor/github.com/golang/snappy/encode_arm64.s +++ b/vendor/github.com/golang/snappy/encode_arm64.s @@ -27,7 +27,7 @@ // The unusual register allocation of local variables, such as R10 for the // source pointer, matches the allocation used at the call site in encodeBlock, // which makes it easier to manually inline this function. -TEXT ·emitLiteral(SB), NOSPLIT, $32-56 +TEXT ·emitLiteral(SB), NOSPLIT, $40-56 MOVD dst_base+0(FP), R8 MOVD lit_base+24(FP), R10 MOVD lit_len+32(FP), R3 @@ -261,7 +261,7 @@ extendMatchEnd: // "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An // extra 64 bytes, to call other functions, and an extra 64 bytes, to spill // local variables (registers) during calls gives 32768 + 64 + 64 = 32896. -TEXT ·encodeBlock(SB), 0, $32896-56 +TEXT ·encodeBlock(SB), 0, $32904-56 MOVD dst_base+0(FP), R8 MOVD src_base+24(FP), R7 MOVD src_len+32(FP), R14 diff --git a/vendor/github.com/klauspost/compress/.gitignore b/vendor/github.com/klauspost/compress/.gitignore index b35f8449..d31b3781 100644 --- a/vendor/github.com/klauspost/compress/.gitignore +++ b/vendor/github.com/klauspost/compress/.gitignore @@ -23,3 +23,10 @@ _testmain.go *.test *.prof /s2/cmd/_s2sx/sfx-exe + +# Linux perf files +perf.data +perf.data.old + +# gdb history +.gdb_history diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml index c9014ce1..7a008a4d 100644 --- a/vendor/github.com/klauspost/compress/.goreleaser.yml +++ b/vendor/github.com/klauspost/compress/.goreleaser.yml @@ -3,6 +3,7 @@ before: hooks: - ./gen.sh + - go install mvdan.cc/garble@v0.9.3 builds: - @@ -31,6 +32,7 @@ builds: - mips64le goarm: - 7 + gobinary: garble - id: "s2d" binary: s2d @@ -57,6 +59,7 @@ builds: - mips64le goarm: - 7 + gobinary: garble - id: "s2sx" binary: s2sx @@ -84,6 +87,7 @@ builds: - mips64le goarm: - 7 + gobinary: garble archives: - diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md index 3429879e..4002a16a 100644 --- a/vendor/github.com/klauspost/compress/README.md +++ b/vendor/github.com/klauspost/compress/README.md @@ -9,7 +9,6 @@ This package provides various compression algorithms. * [huff0](https://github.com/klauspost/compress/tree/master/huff0) and [FSE](https://github.com/klauspost/compress/tree/master/fse) implementations for raw entropy encoding. * [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp) Provides client and server wrappers for handling gzipped requests efficiently. * [pgzip](https://github.com/klauspost/pgzip) is a separate package that provides a very fast parallel gzip implementation. -* [fuzz package](https://github.com/klauspost/compress-fuzz) for fuzz testing all compressors/decompressors here. [![Go Reference](https://pkg.go.dev/badge/klauspost/compress.svg)](https://pkg.go.dev/github.com/klauspost/compress?tab=subdirectories) [![Go](https://github.com/klauspost/compress/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/compress/actions/workflows/go.yml) @@ -17,6 +16,199 @@ This package provides various compression algorithms. # changelog +* June 13, 2023 - [v1.16.6](https://github.com/klauspost/compress/releases/tag/v1.16.6) + * zstd: correctly ignore WithEncoderPadding(1) by @ianlancetaylor in https://github.com/klauspost/compress/pull/806 + * zstd: Add amd64 match length assembly https://github.com/klauspost/compress/pull/824 + * gzhttp: Handle informational headers by @rtribotte in https://github.com/klauspost/compress/pull/815 + * s2: Improve Better compression slightly https://github.com/klauspost/compress/pull/663 + +* Apr 16, 2023 - [v1.16.5](https://github.com/klauspost/compress/releases/tag/v1.16.5) + * zstd: readByte needs to use io.ReadFull by @jnoxon in https://github.com/klauspost/compress/pull/802 + * gzip: Fix WriterTo after initial read https://github.com/klauspost/compress/pull/804 + +* Apr 5, 2023 - [v1.16.4](https://github.com/klauspost/compress/releases/tag/v1.16.4) + * zstd: Improve zstd best efficiency by @greatroar and @klauspost in https://github.com/klauspost/compress/pull/784 + * zstd: Respect WithAllLitEntropyCompression https://github.com/klauspost/compress/pull/792 + * zstd: Fix amd64 not always detecting corrupt data https://github.com/klauspost/compress/pull/785 + * zstd: Various minor improvements by @greatroar in https://github.com/klauspost/compress/pull/788 https://github.com/klauspost/compress/pull/794 https://github.com/klauspost/compress/pull/795 + * s2: Fix huge block overflow https://github.com/klauspost/compress/pull/779 + * s2: Allow CustomEncoder fallback https://github.com/klauspost/compress/pull/780 + * gzhttp: Suppport ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799 + +* Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1) + * zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776 + * gzhttp: Add optional [BREACH mitigation](https://github.com/klauspost/compress/tree/master/gzhttp#breach-mitigation). https://github.com/klauspost/compress/pull/762 https://github.com/klauspost/compress/pull/768 https://github.com/klauspost/compress/pull/769 https://github.com/klauspost/compress/pull/770 https://github.com/klauspost/compress/pull/767 + * s2: Add Intel LZ4s converter https://github.com/klauspost/compress/pull/766 + * zstd: Minor bug fixes https://github.com/klauspost/compress/pull/771 https://github.com/klauspost/compress/pull/772 https://github.com/klauspost/compress/pull/773 + * huff0: Speed up compress1xDo by @greatroar in https://github.com/klauspost/compress/pull/774 + +* Feb 26, 2023 - [v1.16.0](https://github.com/klauspost/compress/releases/tag/v1.16.0) + * s2: Add [Dictionary](https://github.com/klauspost/compress/tree/master/s2#dictionaries) support. https://github.com/klauspost/compress/pull/685 + * s2: Add Compression Size Estimate. https://github.com/klauspost/compress/pull/752 + * s2: Add support for custom stream encoder. https://github.com/klauspost/compress/pull/755 + * s2: Add LZ4 block converter. https://github.com/klauspost/compress/pull/748 + * s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747 + * s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746 + +* Jan 21st, 2023 (v1.15.15) + * deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739 + * zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728 + * zstd: Various speed improvements by @greatroar https://github.com/klauspost/compress/pull/741 https://github.com/klauspost/compress/pull/734 https://github.com/klauspost/compress/pull/736 https://github.com/klauspost/compress/pull/744 https://github.com/klauspost/compress/pull/743 https://github.com/klauspost/compress/pull/745 + * gzhttp: Add SuffixETag() and DropETag() options to prevent ETag collisions on compressed responses by @willbicks in https://github.com/klauspost/compress/pull/740 + +* Jan 3rd, 2023 (v1.15.14) + + * flate: Improve speed in big stateless blocks https://github.com/klauspost/compress/pull/718 + * zstd: Minor speed tweaks by @greatroar in https://github.com/klauspost/compress/pull/716 https://github.com/klauspost/compress/pull/720 + * export NoGzipResponseWriter for custom ResponseWriter wrappers by @harshavardhana in https://github.com/klauspost/compress/pull/722 + * s2: Add example for indexing and existing stream https://github.com/klauspost/compress/pull/723 + +* Dec 11, 2022 (v1.15.13) + * zstd: Add [MaxEncodedSize](https://pkg.go.dev/github.com/klauspost/compress@v1.15.13/zstd#Encoder.MaxEncodedSize) to encoder https://github.com/klauspost/compress/pull/691 + * zstd: Various tweaks and improvements https://github.com/klauspost/compress/pull/693 https://github.com/klauspost/compress/pull/695 https://github.com/klauspost/compress/pull/696 https://github.com/klauspost/compress/pull/701 https://github.com/klauspost/compress/pull/702 https://github.com/klauspost/compress/pull/703 https://github.com/klauspost/compress/pull/704 https://github.com/klauspost/compress/pull/705 https://github.com/klauspost/compress/pull/706 https://github.com/klauspost/compress/pull/707 https://github.com/klauspost/compress/pull/708 + +* Oct 26, 2022 (v1.15.12) + + * zstd: Tweak decoder allocs. https://github.com/klauspost/compress/pull/680 + * gzhttp: Always delete `HeaderNoCompression` https://github.com/klauspost/compress/pull/683 + +* Sept 26, 2022 (v1.15.11) + + * flate: Improve level 1-3 compression https://github.com/klauspost/compress/pull/678 + * zstd: Improve "best" compression by @nightwolfz in https://github.com/klauspost/compress/pull/677 + * zstd: Fix+reduce decompression allocations https://github.com/klauspost/compress/pull/668 + * zstd: Fix non-effective noescape tag https://github.com/klauspost/compress/pull/667 + +* Sept 16, 2022 (v1.15.10) + + * zstd: Add [WithDecodeAllCapLimit](https://pkg.go.dev/github.com/klauspost/compress@v1.15.10/zstd#WithDecodeAllCapLimit) https://github.com/klauspost/compress/pull/649 + * Add Go 1.19 - deprecate Go 1.16 https://github.com/klauspost/compress/pull/651 + * flate: Improve level 5+6 compression https://github.com/klauspost/compress/pull/656 + * zstd: Improve "better" compresssion https://github.com/klauspost/compress/pull/657 + * s2: Improve "best" compression https://github.com/klauspost/compress/pull/658 + * s2: Improve "better" compression. https://github.com/klauspost/compress/pull/635 + * s2: Slightly faster non-assembly decompression https://github.com/klauspost/compress/pull/646 + * Use arrays for constant size copies https://github.com/klauspost/compress/pull/659 + +* July 21, 2022 (v1.15.9) + + * zstd: Fix decoder crash on amd64 (no BMI) on invalid input https://github.com/klauspost/compress/pull/645 + * zstd: Disable decoder extended memory copies (amd64) due to possible crashes https://github.com/klauspost/compress/pull/644 + * zstd: Allow single segments up to "max decoded size" by @klauspost in https://github.com/klauspost/compress/pull/643 + +* July 13, 2022 (v1.15.8) + + * gzip: fix stack exhaustion bug in Reader.Read https://github.com/klauspost/compress/pull/641 + * s2: Add Index header trim/restore https://github.com/klauspost/compress/pull/638 + * zstd: Optimize seqdeq amd64 asm by @greatroar in https://github.com/klauspost/compress/pull/636 + * zstd: Improve decoder memcopy https://github.com/klauspost/compress/pull/637 + * huff0: Pass a single bitReader pointer to asm by @greatroar in https://github.com/klauspost/compress/pull/634 + * zstd: Branchless getBits for amd64 w/o BMI2 by @greatroar in https://github.com/klauspost/compress/pull/640 + * gzhttp: Remove header before writing https://github.com/klauspost/compress/pull/639 + +* June 29, 2022 (v1.15.7) + + * s2: Fix absolute forward seeks https://github.com/klauspost/compress/pull/633 + * zip: Merge upstream https://github.com/klauspost/compress/pull/631 + * zip: Re-add zip64 fix https://github.com/klauspost/compress/pull/624 + * zstd: translate fseDecoder.buildDtable into asm by @WojciechMula in https://github.com/klauspost/compress/pull/598 + * flate: Faster histograms https://github.com/klauspost/compress/pull/620 + * deflate: Use compound hcode https://github.com/klauspost/compress/pull/622 + +* June 3, 2022 (v1.15.6) + * s2: Improve coding for long, close matches https://github.com/klauspost/compress/pull/613 + * s2c: Add Snappy/S2 stream recompression https://github.com/klauspost/compress/pull/611 + * zstd: Always use configured block size https://github.com/klauspost/compress/pull/605 + * zstd: Fix incorrect hash table placement for dict encoding in default https://github.com/klauspost/compress/pull/606 + * zstd: Apply default config to ZipDecompressor without options https://github.com/klauspost/compress/pull/608 + * gzhttp: Exclude more common archive formats https://github.com/klauspost/compress/pull/612 + * s2: Add ReaderIgnoreCRC https://github.com/klauspost/compress/pull/609 + * s2: Remove sanity load on index creation https://github.com/klauspost/compress/pull/607 + * snappy: Use dedicated function for scoring https://github.com/klauspost/compress/pull/614 + * s2c+s2d: Use official snappy framed extension https://github.com/klauspost/compress/pull/610 + +* May 25, 2022 (v1.15.5) + * s2: Add concurrent stream decompression https://github.com/klauspost/compress/pull/602 + * s2: Fix final emit oob read crash on amd64 https://github.com/klauspost/compress/pull/601 + * huff0: asm implementation of Decompress1X by @WojciechMula https://github.com/klauspost/compress/pull/596 + * zstd: Use 1 less goroutine for stream decoding https://github.com/klauspost/compress/pull/588 + * zstd: Copy literal in 16 byte blocks when possible https://github.com/klauspost/compress/pull/592 + * zstd: Speed up when WithDecoderLowmem(false) https://github.com/klauspost/compress/pull/599 + * zstd: faster next state update in BMI2 version of decode by @WojciechMula in https://github.com/klauspost/compress/pull/593 + * huff0: Do not check max size when reading table. https://github.com/klauspost/compress/pull/586 + * flate: Inplace hashing for level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/590 + + +* May 11, 2022 (v1.15.4) + * huff0: decompress directly into output by @WojciechMula in [#577](https://github.com/klauspost/compress/pull/577) + * inflate: Keep dict on stack [#581](https://github.com/klauspost/compress/pull/581) + * zstd: Faster decoding memcopy in asm [#583](https://github.com/klauspost/compress/pull/583) + * zstd: Fix ignored crc [#580](https://github.com/klauspost/compress/pull/580) + +* May 5, 2022 (v1.15.3) + * zstd: Allow to ignore checksum checking by @WojciechMula [#572](https://github.com/klauspost/compress/pull/572) + * s2: Fix incorrect seek for io.SeekEnd in [#575](https://github.com/klauspost/compress/pull/575) + +* Apr 26, 2022 (v1.15.2) + * zstd: Add x86-64 assembly for decompression on streams and blocks. Contributed by [@WojciechMula](https://github.com/WojciechMula). Typically 2x faster. [#528](https://github.com/klauspost/compress/pull/528) [#531](https://github.com/klauspost/compress/pull/531) [#545](https://github.com/klauspost/compress/pull/545) [#537](https://github.com/klauspost/compress/pull/537) + * zstd: Add options to ZipDecompressor and fixes [#539](https://github.com/klauspost/compress/pull/539) + * s2: Use sorted search for index [#555](https://github.com/klauspost/compress/pull/555) + * Minimum version is Go 1.16, added CI test on 1.18. + +* Mar 11, 2022 (v1.15.1) + * huff0: Add x86 assembly of Decode4X by @WojciechMula in [#512](https://github.com/klauspost/compress/pull/512) + * zstd: Reuse zip decoders in [#514](https://github.com/klauspost/compress/pull/514) + * zstd: Detect extra block data and report as corrupted in [#520](https://github.com/klauspost/compress/pull/520) + * zstd: Handle zero sized frame content size stricter in [#521](https://github.com/klauspost/compress/pull/521) + * zstd: Add stricter block size checks in [#523](https://github.com/klauspost/compress/pull/523) + +* Mar 3, 2022 (v1.15.0) + * zstd: Refactor decoder by @klauspost in [#498](https://github.com/klauspost/compress/pull/498) + * zstd: Add stream encoding without goroutines by @klauspost in [#505](https://github.com/klauspost/compress/pull/505) + * huff0: Prevent single blocks exceeding 16 bits by @klauspost in[#507](https://github.com/klauspost/compress/pull/507) + * flate: Inline literal emission by @klauspost in [#509](https://github.com/klauspost/compress/pull/509) + * gzhttp: Add zstd to transport by @klauspost in [#400](https://github.com/klauspost/compress/pull/400) + * gzhttp: Make content-type optional by @klauspost in [#510](https://github.com/klauspost/compress/pull/510) + +Both compression and decompression now supports "synchronous" stream operations. This means that whenever "concurrency" is set to 1, they will operate without spawning goroutines. + +Stream decompression is now faster on asynchronous, since the goroutine allocation much more effectively splits the workload. On typical streams this will typically use 2 cores fully for decompression. When a stream has finished decoding no goroutines will be left over, so decoders can now safely be pooled and still be garbage collected. + +While the release has been extensively tested, it is recommended to testing when upgrading. + +
+ See changes to v1.14.x + +* Feb 22, 2022 (v1.14.4) + * flate: Fix rare huffman only (-2) corruption. [#503](https://github.com/klauspost/compress/pull/503) + * zip: Update deprecated CreateHeaderRaw to correctly call CreateRaw by @saracen in [#502](https://github.com/klauspost/compress/pull/502) + * zip: don't read data descriptor early by @saracen in [#501](https://github.com/klauspost/compress/pull/501) #501 + * huff0: Use static decompression buffer up to 30% faster by @klauspost in [#499](https://github.com/klauspost/compress/pull/499) [#500](https://github.com/klauspost/compress/pull/500) + +* Feb 17, 2022 (v1.14.3) + * flate: Improve fastest levels compression speed ~10% more throughput. [#482](https://github.com/klauspost/compress/pull/482) [#489](https://github.com/klauspost/compress/pull/489) [#490](https://github.com/klauspost/compress/pull/490) [#491](https://github.com/klauspost/compress/pull/491) [#494](https://github.com/klauspost/compress/pull/494) [#478](https://github.com/klauspost/compress/pull/478) + * flate: Faster decompression speed, ~5-10%. [#483](https://github.com/klauspost/compress/pull/483) + * s2: Faster compression with Go v1.18 and amd64 microarch level 3+. [#484](https://github.com/klauspost/compress/pull/484) [#486](https://github.com/klauspost/compress/pull/486) + +* Jan 25, 2022 (v1.14.2) + * zstd: improve header decoder by @dsnet [#476](https://github.com/klauspost/compress/pull/476) + * zstd: Add bigger default blocks [#469](https://github.com/klauspost/compress/pull/469) + * zstd: Remove unused decompression buffer [#470](https://github.com/klauspost/compress/pull/470) + * zstd: Fix logically dead code by @ningmingxiao [#472](https://github.com/klauspost/compress/pull/472) + * flate: Improve level 7-9 [#471](https://github.com/klauspost/compress/pull/471) [#473](https://github.com/klauspost/compress/pull/473) + * zstd: Add noasm tag for xxhash [#475](https://github.com/klauspost/compress/pull/475) + +* Jan 11, 2022 (v1.14.1) + * s2: Add stream index in [#462](https://github.com/klauspost/compress/pull/462) + * flate: Speed and efficiency improvements in [#439](https://github.com/klauspost/compress/pull/439) [#461](https://github.com/klauspost/compress/pull/461) [#455](https://github.com/klauspost/compress/pull/455) [#452](https://github.com/klauspost/compress/pull/452) [#458](https://github.com/klauspost/compress/pull/458) + * zstd: Performance improvement in [#420]( https://github.com/klauspost/compress/pull/420) [#456](https://github.com/klauspost/compress/pull/456) [#437](https://github.com/klauspost/compress/pull/437) [#467](https://github.com/klauspost/compress/pull/467) [#468](https://github.com/klauspost/compress/pull/468) + * zstd: add arm64 xxhash assembly in [#464](https://github.com/klauspost/compress/pull/464) + * Add garbled for binaries for s2 in [#445](https://github.com/klauspost/compress/pull/445) +
+ +
+ See changes to v1.13.x + * Aug 30, 2021 (v1.13.5) * gz/zlib/flate: Alias stdlib errors [#425](https://github.com/klauspost/compress/pull/425) * s2: Add block support to commandline tools [#413](https://github.com/klauspost/compress/pull/413) @@ -45,7 +237,12 @@ This package provides various compression algorithms. * Added [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp#gzip-handler) which allows wrapping HTTP servers and clients with GZIP compressors. * zstd: Detect short invalid signatures [#382](https://github.com/klauspost/compress/pull/382) * zstd: Spawn decoder goroutine only if needed. [#380](https://github.com/klauspost/compress/pull/380) +
+ +
+ See changes to v1.12.x + * May 25, 2021 (v1.12.3) * deflate: Better/faster Huffman encoding [#374](https://github.com/klauspost/compress/pull/374) * deflate: Allocate less for history. [#375](https://github.com/klauspost/compress/pull/375) @@ -67,9 +264,10 @@ This package provides various compression algorithms. * s2c/s2d/s2sx: Always truncate when writing files [#352](https://github.com/klauspost/compress/pull/352) * zstd: Reduce memory usage further when using [WithLowerEncoderMem](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithLowerEncoderMem) [#346](https://github.com/klauspost/compress/pull/346) * s2: Fix potential problem with amd64 assembly and profilers [#349](https://github.com/klauspost/compress/pull/349) +
- See changes prior to v1.12.1 + See changes to v1.11.x * Mar 26, 2021 (v1.11.13) * zstd: Big speedup on small dictionary encodes [#344](https://github.com/klauspost/compress/pull/344) [#345](https://github.com/klauspost/compress/pull/345) @@ -128,7 +326,7 @@ This package provides various compression algorithms.
- See changes prior to v1.11.0 + See changes to v1.10.x * July 8, 2020 (v1.10.11) * zstd: Fix extra block when compressing with ReadFrom. [#278](https://github.com/klauspost/compress/pull/278) @@ -290,11 +488,6 @@ This package provides various compression algorithms. # deflate usage -* [High Throughput Benchmark](http://blog.klauspost.com/go-gzipdeflate-benchmarks/). -* [Small Payload/Webserver Benchmarks](http://blog.klauspost.com/gzip-performance-for-go-webservers/). -* [Linear Time Compression](http://blog.klauspost.com/constant-time-gzipzip-compression/). -* [Re-balancing Deflate Compression Levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/) - The packages are drop-in replacements for standard libraries. Simply replace the import path to use them: | old import | new import | Documentation @@ -316,6 +509,8 @@ Memory usage is typically 1MB for a Writer. stdlib is in the same range. If you expect to have a lot of concurrently allocated Writers consider using the stateless compress described below. +For compression performance, see: [this spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing). + # Stateless compression This package offers stateless compression as a special option for gzip/deflate. @@ -432,6 +627,15 @@ For more information see my blog post on [Fast Linear Time Compression](http://b This is implemented on Go 1.7 as "Huffman Only" mode, though not exposed for gzip. +# Other packages + +Here are other packages of good quality and pure Go (no cgo wrappers or autoconverted code): + +* [github.com/pierrec/lz4](https://github.com/pierrec/lz4) - strong multithreaded LZ4 compression. +* [github.com/cosnicolaou/pbzip2](https://github.com/cosnicolaou/pbzip2) - multithreaded bzip2 decompression. +* [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer. +* [github.com/ronanh/intcomp](https://github.com/ronanh/intcomp) - Integer compression. +* [github.com/spenczar/fpc](https://github.com/spenczar/fpc) - Float compression. # license diff --git a/vendor/github.com/klauspost/compress/SECURITY.md b/vendor/github.com/klauspost/compress/SECURITY.md new file mode 100644 index 00000000..ca6685e2 --- /dev/null +++ b/vendor/github.com/klauspost/compress/SECURITY.md @@ -0,0 +1,25 @@ +# Security Policy + +## Supported Versions + +Security updates are applied only to the latest release. + +## Vulnerability Definition + +A security vulnerability is a bug that with certain input triggers a crash or an infinite loop. Most calls will have varying execution time and only in rare cases will slow operation be considered a security vulnerability. + +Corrupted output generally is not considered a security vulnerability, unless independent operations are able to affect each other. Note that not all functionality is re-entrant and safe to use concurrently. + +Out-of-memory crashes only applies if the en/decoder uses an abnormal amount of memory, with appropriate options applied, to limit maximum window size, concurrency, etc. However, if you are in doubt you are welcome to file a security issue. + +It is assumed that all callers are trusted, meaning internal data exposed through reflection or inspection of returned data structures is not considered a vulnerability. + +Vulnerabilities resulting from compiler/assembler errors should be reported upstream. Depending on the severity this package may or may not implement a workaround. + +## Reporting a Vulnerability + +If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released. + +Please disclose it at [security advisory](https://github.com/klauspost/compress/security/advisories/new). If possible please provide a minimal reproducer. If the issue only applies to a single platform, it would be helpful to provide access to that. + +This project is maintained by a team of volunteers on a reasonable-effort basis. As such, vulnerabilities will be disclosed in a best effort base. diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go index 6f341914..dac97e58 100644 --- a/vendor/github.com/klauspost/compress/fse/compress.go +++ b/vendor/github.com/klauspost/compress/fse/compress.go @@ -146,54 +146,51 @@ func (s *Scratch) compress(src []byte) error { c1.encodeZero(tt[src[ip-2]]) ip -= 2 } + src = src[:ip] // Main compression loop. switch { case !s.zeroBits && s.actualTableLog <= 8: // We can encode 4 symbols without requiring a flush. // We do not need to check if any output is 0 bits. - for ip >= 4 { + for ; len(src) >= 4; src = src[:len(src)-4] { s.bw.flush32() - v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] c2.encode(tt[v0]) c1.encode(tt[v1]) c2.encode(tt[v2]) c1.encode(tt[v3]) - ip -= 4 } case !s.zeroBits: // We do not need to check if any output is 0 bits. - for ip >= 4 { + for ; len(src) >= 4; src = src[:len(src)-4] { s.bw.flush32() - v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] c2.encode(tt[v0]) c1.encode(tt[v1]) s.bw.flush32() c2.encode(tt[v2]) c1.encode(tt[v3]) - ip -= 4 } case s.actualTableLog <= 8: // We can encode 4 symbols without requiring a flush - for ip >= 4 { + for ; len(src) >= 4; src = src[:len(src)-4] { s.bw.flush32() - v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] c2.encodeZero(tt[v0]) c1.encodeZero(tt[v1]) c2.encodeZero(tt[v2]) c1.encodeZero(tt[v3]) - ip -= 4 } default: - for ip >= 4 { + for ; len(src) >= 4; src = src[:len(src)-4] { s.bw.flush32() - v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] c2.encodeZero(tt[v0]) c1.encodeZero(tt[v1]) s.bw.flush32() c2.encodeZero(tt[v2]) c1.encodeZero(tt[v3]) - ip -= 4 } } @@ -459,15 +456,17 @@ func (s *Scratch) countSimple(in []byte) (max int) { for _, v := range in { s.count[v]++ } - m := uint32(0) + m, symlen := uint32(0), s.symbolLen for i, v := range s.count[:] { + if v == 0 { + continue + } if v > m { m = v } - if v > 0 { - s.symbolLen = uint16(i) + 1 - } + symlen = uint16(i) + 1 } + s.symbolLen = symlen return int(m) } diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go index 926f5f15..cc05d0f7 100644 --- a/vendor/github.com/klauspost/compress/fse/decompress.go +++ b/vendor/github.com/klauspost/compress/fse/decompress.go @@ -260,7 +260,9 @@ func (s *Scratch) buildDtable() error { // If the buffer is over-read an error is returned. func (s *Scratch) decompress() error { br := &s.bits - br.init(s.br.unread()) + if err := br.init(s.br.unread()); err != nil { + return err + } var s1, s2 decoder // Initialize and decode first state and symbol. diff --git a/vendor/github.com/klauspost/compress/huff0/bitreader.go b/vendor/github.com/klauspost/compress/huff0/bitreader.go index a4979e88..e36d9742 100644 --- a/vendor/github.com/klauspost/compress/huff0/bitreader.go +++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go @@ -8,115 +8,10 @@ package huff0 import ( "encoding/binary" "errors" + "fmt" "io" ) -// bitReader reads a bitstream in reverse. -// The last set bit indicates the start of the stream and is used -// for aligning the input. -type bitReader struct { - in []byte - off uint // next byte to read is at in[off - 1] - value uint64 - bitsRead uint8 -} - -// init initializes and resets the bit reader. -func (b *bitReader) init(in []byte) error { - if len(in) < 1 { - return errors.New("corrupt stream: too short") - } - b.in = in - b.off = uint(len(in)) - // The highest bit of the last byte indicates where to start - v := in[len(in)-1] - if v == 0 { - return errors.New("corrupt stream, did not find end of stream") - } - b.bitsRead = 64 - b.value = 0 - if len(in) >= 8 { - b.fillFastStart() - } else { - b.fill() - b.fill() - } - b.bitsRead += 8 - uint8(highBit32(uint32(v))) - return nil -} - -// peekBitsFast requires that at least one bit is requested every time. -// There are no checks if the buffer is filled. -func (b *bitReader) peekBitsFast(n uint8) uint16 { - const regMask = 64 - 1 - v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) - return v -} - -// fillFast() will make sure at least 32 bits are available. -// There must be at least 4 bytes available. -func (b *bitReader) fillFast() { - if b.bitsRead < 32 { - return - } - - // 2 bounds checks. - v := b.in[b.off-4 : b.off] - v = v[:4] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - b.value = (b.value << 32) | uint64(low) - b.bitsRead -= 32 - b.off -= 4 -} - -func (b *bitReader) advance(n uint8) { - b.bitsRead += n -} - -// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. -func (b *bitReader) fillFastStart() { - // Do single re-slice to avoid bounds checks. - b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) - b.bitsRead = 0 - b.off -= 8 -} - -// fill() will make sure at least 32 bits are available. -func (b *bitReader) fill() { - if b.bitsRead < 32 { - return - } - if b.off > 4 { - v := b.in[b.off-4:] - v = v[:4] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - b.value = (b.value << 32) | uint64(low) - b.bitsRead -= 32 - b.off -= 4 - return - } - for b.off > 0 { - b.value = (b.value << 8) | uint64(b.in[b.off-1]) - b.bitsRead -= 8 - b.off-- - } -} - -// finished returns true if all bits have been read from the bit stream. -func (b *bitReader) finished() bool { - return b.off == 0 && b.bitsRead >= 64 -} - -// close the bitstream and returns an error if out-of-buffer reads occurred. -func (b *bitReader) close() error { - // Release reference. - b.in = nil - if b.bitsRead > 64 { - return io.ErrUnexpectedEOF - } - return nil -} - // bitReader reads a bitstream in reverse. // The last set bit indicates the start of the stream and is used // for aligning the input. @@ -172,7 +67,6 @@ func (b *bitReaderBytes) fillFast() { // 2 bounds checks. v := b.in[b.off-4 : b.off] - v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value |= uint64(low) << (b.bitsRead - 32) b.bitsRead -= 32 @@ -193,8 +87,7 @@ func (b *bitReaderBytes) fill() { return } if b.off > 4 { - v := b.in[b.off-4:] - v = v[:4] + v := b.in[b.off-4 : b.off] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value |= uint64(low) << (b.bitsRead - 32) b.bitsRead -= 32 @@ -213,10 +106,17 @@ func (b *bitReaderBytes) finished() bool { return b.off == 0 && b.bitsRead >= 64 } +func (b *bitReaderBytes) remaining() uint { + return b.off*8 + uint(64-b.bitsRead) +} + // close the bitstream and returns an error if out-of-buffer reads occurred. func (b *bitReaderBytes) close() error { // Release reference. b.in = nil + if b.remaining() > 0 { + return fmt.Errorf("corrupt input: %d bits remain on stream", b.remaining()) + } if b.bitsRead > 64 { return io.ErrUnexpectedEOF } @@ -277,7 +177,6 @@ func (b *bitReaderShifted) fillFast() { // 2 bounds checks. v := b.in[b.off-4 : b.off] - v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value |= uint64(low) << ((b.bitsRead - 32) & 63) b.bitsRead -= 32 @@ -298,8 +197,7 @@ func (b *bitReaderShifted) fill() { return } if b.off > 4 { - v := b.in[b.off-4:] - v = v[:4] + v := b.in[b.off-4 : b.off] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value |= uint64(low) << ((b.bitsRead - 32) & 63) b.bitsRead -= 32 @@ -313,15 +211,17 @@ func (b *bitReaderShifted) fill() { } } -// finished returns true if all bits have been read from the bit stream. -func (b *bitReaderShifted) finished() bool { - return b.off == 0 && b.bitsRead >= 64 +func (b *bitReaderShifted) remaining() uint { + return b.off*8 + uint(64-b.bitsRead) } // close the bitstream and returns an error if out-of-buffer reads occurred. func (b *bitReaderShifted) close() error { // Release reference. b.in = nil + if b.remaining() > 0 { + return fmt.Errorf("corrupt input: %d bits remain on stream", b.remaining()) + } if b.bitsRead > 64 { return io.ErrUnexpectedEOF } diff --git a/vendor/github.com/klauspost/compress/huff0/bitwriter.go b/vendor/github.com/klauspost/compress/huff0/bitwriter.go index 6bce4e87..b4d7164e 100644 --- a/vendor/github.com/klauspost/compress/huff0/bitwriter.go +++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go @@ -5,8 +5,6 @@ package huff0 -import "fmt" - // bitWriter will write bits. // First bit will be LSB of the first byte of output. type bitWriter struct { @@ -15,22 +13,6 @@ type bitWriter struct { out []byte } -// bitMask16 is bitmasks. Has extra to avoid bounds check. -var bitMask16 = [32]uint16{ - 0, 1, 3, 7, 0xF, 0x1F, - 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, - 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF, - 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, - 0xFFFF, 0xFFFF} /* up to 16 bits */ - -// addBits16NC will add up to 16 bits. -// It will not check if there is space for them, -// so the caller must ensure that it has flushed recently. -func (b *bitWriter) addBits16NC(value uint16, bits uint8) { - b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63) - b.nBits += bits -} - // addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated. // It will not check if there is space for them, so the caller must ensure that it has flushed recently. func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { @@ -70,102 +52,20 @@ func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) { b.nBits += encA.nBits + encB.nBits } -// addBits16ZeroNC will add up to 16 bits. +// encFourSymbols adds up to 32 bits from four symbols. // It will not check if there is space for them, -// so the caller must ensure that it has flushed recently. -// This is fastest if bits can be zero. -func (b *bitWriter) addBits16ZeroNC(value uint16, bits uint8) { - if bits == 0 { - return - } - value <<= (16 - bits) & 15 - value >>= (16 - bits) & 15 - b.bitContainer |= uint64(value) << (b.nBits & 63) - b.nBits += bits -} - -// flush will flush all pending full bytes. -// There will be at least 56 bits available for writing when this has been called. -// Using flush32 is faster, but leaves less space for writing. -func (b *bitWriter) flush() { - v := b.nBits >> 3 - switch v { - case 0: - return - case 1: - b.out = append(b.out, - byte(b.bitContainer), - ) - b.bitContainer >>= 1 << 3 - case 2: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - ) - b.bitContainer >>= 2 << 3 - case 3: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - ) - b.bitContainer >>= 3 << 3 - case 4: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - ) - b.bitContainer >>= 4 << 3 - case 5: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - ) - b.bitContainer >>= 5 << 3 - case 6: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - byte(b.bitContainer>>40), - ) - b.bitContainer >>= 6 << 3 - case 7: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - byte(b.bitContainer>>40), - byte(b.bitContainer>>48), - ) - b.bitContainer >>= 7 << 3 - case 8: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - byte(b.bitContainer>>40), - byte(b.bitContainer>>48), - byte(b.bitContainer>>56), - ) - b.bitContainer = 0 - b.nBits = 0 - return - default: - panic(fmt.Errorf("bits (%d) > 64", b.nBits)) - } - b.nBits &= 7 +// so the caller must ensure that b has been flushed recently. +func (b *bitWriter) encFourSymbols(encA, encB, encC, encD cTableEntry) { + bitsA := encA.nBits + bitsB := bitsA + encB.nBits + bitsC := bitsB + encC.nBits + bitsD := bitsC + encD.nBits + combined := uint64(encA.val) | + (uint64(encB.val) << (bitsA & 63)) | + (uint64(encC.val) << (bitsB & 63)) | + (uint64(encD.val) << (bitsC & 63)) + b.bitContainer |= combined << (b.nBits & 63) + b.nBits += bitsD } // flush32 will flush out, so there are at least 32 bits available for writing. @@ -201,10 +101,3 @@ func (b *bitWriter) close() error { b.flushAlign() return nil } - -// reset and continue writing by appending to out. -func (b *bitWriter) reset(out []byte) { - b.bitContainer = 0 - b.nBits = 0 - b.out = out -} diff --git a/vendor/github.com/klauspost/compress/huff0/bytereader.go b/vendor/github.com/klauspost/compress/huff0/bytereader.go index 50bcdf6e..4dcab8d2 100644 --- a/vendor/github.com/klauspost/compress/huff0/bytereader.go +++ b/vendor/github.com/klauspost/compress/huff0/bytereader.go @@ -20,11 +20,6 @@ func (b *byteReader) init(in []byte) { b.off = 0 } -// advance the stream b n bytes. -func (b *byteReader) advance(n uint) { - b.off += int(n) -} - // Int32 returns a little endian int32 starting at current offset. func (b byteReader) Int32() int32 { v3 := int32(b.b[b.off+3]) @@ -43,11 +38,6 @@ func (b byteReader) Uint32() uint32 { return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0 } -// unread returns the unread portion of the input. -func (b byteReader) unread() []byte { - return b.b[b.off:] -} - // remain will return the number of bytes remaining. func (b byteReader) remain() int { return len(b.b) - b.off diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go index 8323dc05..4ee4fa18 100644 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -2,6 +2,7 @@ package huff0 import ( "fmt" + "math" "runtime" "sync" ) @@ -247,8 +248,7 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) { tmp := src[n : n+4] // tmp should be len 4 bw.flush32() - bw.encTwoSymbols(cTable, tmp[3], tmp[2]) - bw.encTwoSymbols(cTable, tmp[1], tmp[0]) + bw.encFourSymbols(cTable[tmp[3]], cTable[tmp[2]], cTable[tmp[1]], cTable[tmp[0]]) } } else { for ; n >= 0; n -= 4 { @@ -289,6 +289,10 @@ func (s *Scratch) compress4X(src []byte) ([]byte, error) { if err != nil { return nil, err } + if len(s.Out)-idx > math.MaxUint16 { + // We cannot store the size in the jump table + return nil, ErrIncompressible + } // Write compressed length as little endian before block. if i < 3 { // Last length is not written. @@ -332,6 +336,10 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) { return nil, errs[i] } o := s.tmpOut[i] + if len(o) > math.MaxUint16 { + // We cannot store the size in the jump table + return nil, ErrIncompressible + } // Write compressed length as little endian before block. if i < 3 { // Last length is not written. @@ -356,29 +364,29 @@ func (s *Scratch) countSimple(in []byte) (max int, reuse bool) { m := uint32(0) if len(s.prevTable) > 0 { for i, v := range s.count[:] { + if v == 0 { + continue + } if v > m { m = v } - if v > 0 { - s.symbolLen = uint16(i) + 1 - if i >= len(s.prevTable) { - reuse = false - } else { - if s.prevTable[i].nBits == 0 { - reuse = false - } - } + s.symbolLen = uint16(i) + 1 + if i >= len(s.prevTable) { + reuse = false + } else if s.prevTable[i].nBits == 0 { + reuse = false } } return int(m), reuse } for i, v := range s.count[:] { + if v == 0 { + continue + } if v > m { m = v } - if v > 0 { - s.symbolLen = uint16(i) + 1 - } + s.symbolLen = uint16(i) + 1 } return int(m), false } @@ -395,6 +403,7 @@ func (s *Scratch) canUseTable(c cTable) bool { return true } +//lint:ignore U1000 used for debugging func (s *Scratch) validateTable(c cTable) bool { if len(c) < int(s.symbolLen) { return false @@ -474,34 +483,35 @@ func (s *Scratch) buildCTable() error { // Different from reference implementation. huffNode0 := s.nodes[0 : huffNodesLen+1] - for huffNode[nonNullRank].count == 0 { + for huffNode[nonNullRank].count() == 0 { nonNullRank-- } lowS := int16(nonNullRank) nodeRoot := nodeNb + lowS - 1 lowN := nodeNb - huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count - huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb) + huffNode[nodeNb].setCount(huffNode[lowS].count() + huffNode[lowS-1].count()) + huffNode[lowS].setParent(nodeNb) + huffNode[lowS-1].setParent(nodeNb) nodeNb++ lowS -= 2 for n := nodeNb; n <= nodeRoot; n++ { - huffNode[n].count = 1 << 30 + huffNode[n].setCount(1 << 30) } // fake entry, strong barrier - huffNode0[0].count = 1 << 31 + huffNode0[0].setCount(1 << 31) // create parents for nodeNb <= nodeRoot { var n1, n2 int16 - if huffNode0[lowS+1].count < huffNode0[lowN+1].count { + if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() { n1 = lowS lowS-- } else { n1 = lowN lowN++ } - if huffNode0[lowS+1].count < huffNode0[lowN+1].count { + if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() { n2 = lowS lowS-- } else { @@ -509,18 +519,19 @@ func (s *Scratch) buildCTable() error { lowN++ } - huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count - huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb) + huffNode[nodeNb].setCount(huffNode0[n1+1].count() + huffNode0[n2+1].count()) + huffNode0[n1+1].setParent(nodeNb) + huffNode0[n2+1].setParent(nodeNb) nodeNb++ } // distribute weights (unlimited tree height) - huffNode[nodeRoot].nbBits = 0 + huffNode[nodeRoot].setNbBits(0) for n := nodeRoot - 1; n >= startNode; n-- { - huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1 + huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1) } for n := uint16(0); n <= nonNullRank; n++ { - huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1 + huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1) } s.actualTableLog = s.setMaxHeight(int(nonNullRank)) maxNbBits := s.actualTableLog @@ -532,7 +543,7 @@ func (s *Scratch) buildCTable() error { var nbPerRank [tableLogMax + 1]uint16 var valPerRank [16]uint16 for _, v := range huffNode[:nonNullRank+1] { - nbPerRank[v.nbBits]++ + nbPerRank[v.nbBits()]++ } // determine stating value per rank { @@ -547,7 +558,7 @@ func (s *Scratch) buildCTable() error { // push nbBits per symbol, symbol order for _, v := range huffNode[:nonNullRank+1] { - s.cTable[v.symbol].nBits = v.nbBits + s.cTable[v.symbol()].nBits = v.nbBits() } // assign value within rank, symbol order @@ -593,12 +604,12 @@ func (s *Scratch) huffSort() { pos := rank[r].current rank[r].current++ prev := nodes[(pos-1)&huffNodesMask] - for pos > rank[r].base && c > prev.count { + for pos > rank[r].base && c > prev.count() { nodes[pos&huffNodesMask] = prev pos-- prev = nodes[(pos-1)&huffNodesMask] } - nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)} + nodes[pos&huffNodesMask] = makeNodeElt(c, byte(n)) } } @@ -607,7 +618,7 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { huffNode := s.nodes[1 : huffNodesLen+1] //huffNode = huffNode[: huffNodesLen] - largestBits := huffNode[lastNonNull].nbBits + largestBits := huffNode[lastNonNull].nbBits() // early exit : no elt > maxNbBits if largestBits <= maxNbBits { @@ -617,14 +628,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { baseCost := int(1) << (largestBits - maxNbBits) n := uint32(lastNonNull) - for huffNode[n].nbBits > maxNbBits { - totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)) - huffNode[n].nbBits = maxNbBits + for huffNode[n].nbBits() > maxNbBits { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits())) + huffNode[n].setNbBits(maxNbBits) n-- } // n stops at huffNode[n].nbBits <= maxNbBits - for huffNode[n].nbBits == maxNbBits { + for huffNode[n].nbBits() == maxNbBits { n-- } // n end at index of smallest symbol using < maxNbBits @@ -645,10 +656,10 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { { currentNbBits := maxNbBits for pos := int(n); pos >= 0; pos-- { - if huffNode[pos].nbBits >= currentNbBits { + if huffNode[pos].nbBits() >= currentNbBits { continue } - currentNbBits = huffNode[pos].nbBits // < maxNbBits + currentNbBits = huffNode[pos].nbBits() // < maxNbBits rankLast[maxNbBits-currentNbBits] = uint32(pos) } } @@ -665,8 +676,8 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { if lowPos == noSymbol { break } - highTotal := huffNode[highPos].count - lowTotal := 2 * huffNode[lowPos].count + highTotal := huffNode[highPos].count() + lowTotal := 2 * huffNode[lowPos].count() if highTotal <= lowTotal { break } @@ -682,13 +693,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { // this rank is no longer empty rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease] } - huffNode[rankLast[nBitsToDecrease]].nbBits++ + huffNode[rankLast[nBitsToDecrease]].setNbBits(1 + + huffNode[rankLast[nBitsToDecrease]].nbBits()) if rankLast[nBitsToDecrease] == 0 { /* special case, reached largest symbol */ rankLast[nBitsToDecrease] = noSymbol } else { rankLast[nBitsToDecrease]-- - if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease { + if huffNode[rankLast[nBitsToDecrease]].nbBits() != maxNbBits-nBitsToDecrease { rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */ } } @@ -696,15 +708,15 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { for totalCost < 0 { /* Sometimes, cost correction overshoot */ if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ - for huffNode[n].nbBits == maxNbBits { + for huffNode[n].nbBits() == maxNbBits { n-- } - huffNode[n+1].nbBits-- + huffNode[n+1].setNbBits(huffNode[n+1].nbBits() - 1) rankLast[1] = n + 1 totalCost++ continue } - huffNode[rankLast[1]+1].nbBits-- + huffNode[rankLast[1]+1].setNbBits(huffNode[rankLast[1]+1].nbBits() - 1) rankLast[1]++ totalCost++ } @@ -712,9 +724,26 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { return maxNbBits } -type nodeElt struct { - count uint32 - parent uint16 - symbol byte - nbBits uint8 +// A nodeElt is the fields +// +// count uint32 +// parent uint16 +// symbol byte +// nbBits uint8 +// +// in some order, all squashed into an integer so that the compiler +// always loads and stores entire nodeElts instead of separate fields. +type nodeElt uint64 + +func makeNodeElt(count uint32, symbol byte) nodeElt { + return nodeElt(count) | nodeElt(symbol)<<48 } + +func (e *nodeElt) count() uint32 { return uint32(*e) } +func (e *nodeElt) parent() uint16 { return uint16(*e >> 32) } +func (e *nodeElt) symbol() byte { return byte(*e >> 48) } +func (e *nodeElt) nbBits() uint8 { return uint8(*e >> 56) } + +func (e *nodeElt) setCount(c uint32) { *e = (*e)&0xffffffff00000000 | nodeElt(c) } +func (e *nodeElt) setParent(p int16) { *e = (*e)&0xffff0000ffffffff | nodeElt(uint16(p))<<32 } +func (e *nodeElt) setNbBits(n uint8) { *e = (*e)&0x00ffffffffffffff | nodeElt(n)<<56 } diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go index 9b7cc8e9..54bd08b2 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress.go +++ b/vendor/github.com/klauspost/compress/huff0/decompress.go @@ -4,13 +4,13 @@ import ( "errors" "fmt" "io" + "sync" "github.com/klauspost/compress/fse" ) type dTable struct { single []dEntrySingle - double []dEntryDouble } // single-symbols decoding @@ -18,13 +18,6 @@ type dEntrySingle struct { entry uint16 } -// double-symbols decoding -type dEntryDouble struct { - seq uint16 - nBits uint8 - len uint8 -} - // Uses special code for all tables that are < 8 bits. const use8BitTables = true @@ -34,7 +27,7 @@ const use8BitTables = true // If no Scratch is provided a new one is allocated. // The returned Scratch can be used for encoding or decoding input using this table. func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { - s, err = s.prepare(in) + s, err = s.prepare(nil) if err != nil { return s, nil, err } @@ -68,7 +61,7 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { b, err := fse.Decompress(in[:iSize], s.fse) s.fse.Out = nil if err != nil { - return s, nil, err + return s, nil, fmt.Errorf("fse decompress returned: %w", err) } if len(b) > 255 { return s, nil, errors.New("corrupt input: output table too large") @@ -216,6 +209,7 @@ func (s *Scratch) Decoder() *Decoder { return &Decoder{ dt: s.dt, actualTableLog: s.actualTableLog, + bufs: &s.decPool, } } @@ -223,103 +217,15 @@ func (s *Scratch) Decoder() *Decoder { type Decoder struct { dt dTable actualTableLog uint8 + bufs *sync.Pool } -// Decompress1X will decompress a 1X encoded stream. -// The cap of the output buffer will be the maximum decompressed size. -// The length of the supplied input must match the end of a block exactly. -func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { - if len(d.dt.single) == 0 { - return nil, errors.New("no table loaded") - } - if use8BitTables && d.actualTableLog <= 8 { - return d.decompress1X8Bit(dst, src) - } - var br bitReaderShifted - err := br.init(src) - if err != nil { - return dst, err - } - maxDecodedSize := cap(dst) - dst = dst[:0] - - // Avoid bounds check by always having full sized table. - const tlSize = 1 << tableLogMax - const tlMask = tlSize - 1 - dt := d.dt.single[:tlSize] - - // Use temp table to avoid bound checks/append penalty. - var buf [256]byte - var off uint8 - - for br.off >= 8 { - br.fillFast() - v := dt[br.peekBitsFast(d.actualTableLog)&tlMask] - br.advance(uint8(v.entry)) - buf[off+0] = uint8(v.entry >> 8) - - v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] - br.advance(uint8(v.entry)) - buf[off+1] = uint8(v.entry >> 8) - - // Refill - br.fillFast() - - v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] - br.advance(uint8(v.entry)) - buf[off+2] = uint8(v.entry >> 8) - - v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] - br.advance(uint8(v.entry)) - buf[off+3] = uint8(v.entry >> 8) - - off += 4 - if off == 0 { - if len(dst)+256 > maxDecodedSize { - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - dst = append(dst, buf[:]...) - } - } - - if len(dst)+int(off) > maxDecodedSize { - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - dst = append(dst, buf[:off]...) - - // br < 8, so uint8 is fine - bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead - for bitsLeft > 0 { - br.fill() - if false && br.bitsRead >= 32 { - if br.off >= 4 { - v := br.in[br.off-4:] - v = v[:4] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - br.value = (br.value << 32) | uint64(low) - br.bitsRead -= 32 - br.off -= 4 - } else { - for br.off > 0 { - br.value = (br.value << 8) | uint64(br.in[br.off-1]) - br.bitsRead -= 8 - br.off-- - } - } - } - if len(dst) >= maxDecodedSize { - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask] - nBits := uint8(v.entry) - br.advance(nBits) - bitsLeft -= nBits - dst = append(dst, uint8(v.entry>>8)) +func (d *Decoder) buffer() *[4][256]byte { + buf, ok := d.bufs.Get().(*[4][256]byte) + if ok { + return buf } - return dst, br.close() + return &[4][256]byte{} } // decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8. @@ -341,12 +247,13 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { dt := d.dt.single[:256] // Use temp table to avoid bound checks/append penalty. - var buf [256]byte + bufs := d.buffer() + buf := &bufs[0] var off uint8 switch d.actualTableLog { case 8: - const shift = 8 - 8 + const shift = 0 for br.off >= 4 { br.fillFast() v := dt[uint8(br.value>>(56+shift))] @@ -369,6 +276,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { if off == 0 { if len(dst)+256 > maxDecodedSize { br.close() + d.bufs.Put(bufs) return nil, ErrMaxDecodedSizeExceeded } dst = append(dst, buf[:]...) @@ -398,6 +306,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { if off == 0 { if len(dst)+256 > maxDecodedSize { br.close() + d.bufs.Put(bufs) return nil, ErrMaxDecodedSizeExceeded } dst = append(dst, buf[:]...) @@ -426,6 +335,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { off += 4 if off == 0 { if len(dst)+256 > maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } @@ -455,6 +365,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { off += 4 if off == 0 { if len(dst)+256 > maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } @@ -484,6 +395,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { off += 4 if off == 0 { if len(dst)+256 > maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } @@ -513,6 +425,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { off += 4 if off == 0 { if len(dst)+256 > maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } @@ -542,6 +455,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { off += 4 if off == 0 { if len(dst)+256 > maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } @@ -571,6 +485,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { off += 4 if off == 0 { if len(dst)+256 > maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } @@ -578,10 +493,12 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { } } default: + d.bufs.Put(bufs) return nil, fmt.Errorf("invalid tablelog: %d", d.actualTableLog) } if len(dst)+int(off) > maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } @@ -601,6 +518,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { } if len(dst) >= maxDecodedSize { br.close() + d.bufs.Put(bufs) return nil, ErrMaxDecodedSizeExceeded } v := dt[br.peekByteFast()>>shift] @@ -609,6 +527,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { bitsLeft -= int8(nBits) dst = append(dst, uint8(v.entry>>8)) } + d.bufs.Put(bufs) return dst, br.close() } @@ -628,7 +547,8 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { dt := d.dt.single[:256] // Use temp table to avoid bound checks/append penalty. - var buf [256]byte + bufs := d.buffer() + buf := &bufs[0] var off uint8 const shift = 56 @@ -655,6 +575,7 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { off += 4 if off == 0 { if len(dst)+256 > maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } @@ -663,6 +584,7 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { } if len(dst)+int(off) > maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } @@ -679,6 +601,7 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { } } if len(dst) >= maxDecodedSize { + d.bufs.Put(bufs) br.close() return nil, ErrMaxDecodedSizeExceeded } @@ -688,199 +611,10 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { bitsLeft -= int8(nBits) dst = append(dst, uint8(v.entry>>8)) } + d.bufs.Put(bufs) return dst, br.close() } -// Decompress4X will decompress a 4X encoded stream. -// The length of the supplied input must match the end of a block exactly. -// The *capacity* of the dst slice must match the destination size of -// the uncompressed data exactly. -func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { - if len(d.dt.single) == 0 { - return nil, errors.New("no table loaded") - } - if len(src) < 6+(4*1) { - return nil, errors.New("input too small") - } - if use8BitTables && d.actualTableLog <= 8 { - return d.decompress4X8bit(dst, src) - } - - var br [4]bitReaderShifted - start := 6 - for i := 0; i < 3; i++ { - length := int(src[i*2]) | (int(src[i*2+1]) << 8) - if start+length >= len(src) { - return nil, errors.New("truncated input (or invalid offset)") - } - err := br[i].init(src[start : start+length]) - if err != nil { - return nil, err - } - start += length - } - err := br[3].init(src[start:]) - if err != nil { - return nil, err - } - - // destination, offset to match first output - dstSize := cap(dst) - dst = dst[:dstSize] - out := dst - dstEvery := (dstSize + 3) / 4 - - const tlSize = 1 << tableLogMax - const tlMask = tlSize - 1 - single := d.dt.single[:tlSize] - - // Use temp table to avoid bound checks/append penalty. - var buf [256]byte - var off uint8 - var decoded int - - // Decode 2 values from each decoder/loop. - const bufoff = 256 / 4 - for { - if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { - break - } - - { - const stream = 0 - const stream2 = 1 - br[stream].fillFast() - br[stream2].fillFast() - - val := br[stream].peekBitsFast(d.actualTableLog) - v := single[val&tlMask] - br[stream].advance(uint8(v.entry)) - buf[off+bufoff*stream] = uint8(v.entry >> 8) - - val2 := br[stream2].peekBitsFast(d.actualTableLog) - v2 := single[val2&tlMask] - br[stream2].advance(uint8(v2.entry)) - buf[off+bufoff*stream2] = uint8(v2.entry >> 8) - - val = br[stream].peekBitsFast(d.actualTableLog) - v = single[val&tlMask] - br[stream].advance(uint8(v.entry)) - buf[off+bufoff*stream+1] = uint8(v.entry >> 8) - - val2 = br[stream2].peekBitsFast(d.actualTableLog) - v2 = single[val2&tlMask] - br[stream2].advance(uint8(v2.entry)) - buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8) - } - - { - const stream = 2 - const stream2 = 3 - br[stream].fillFast() - br[stream2].fillFast() - - val := br[stream].peekBitsFast(d.actualTableLog) - v := single[val&tlMask] - br[stream].advance(uint8(v.entry)) - buf[off+bufoff*stream] = uint8(v.entry >> 8) - - val2 := br[stream2].peekBitsFast(d.actualTableLog) - v2 := single[val2&tlMask] - br[stream2].advance(uint8(v2.entry)) - buf[off+bufoff*stream2] = uint8(v2.entry >> 8) - - val = br[stream].peekBitsFast(d.actualTableLog) - v = single[val&tlMask] - br[stream].advance(uint8(v.entry)) - buf[off+bufoff*stream+1] = uint8(v.entry >> 8) - - val2 = br[stream2].peekBitsFast(d.actualTableLog) - v2 = single[val2&tlMask] - br[stream2].advance(uint8(v2.entry)) - buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8) - } - - off += 2 - - if off == bufoff { - if bufoff > dstEvery { - return nil, errors.New("corruption detected: stream overrun 1") - } - copy(out, buf[:bufoff]) - copy(out[dstEvery:], buf[bufoff:bufoff*2]) - copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) - copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) - off = 0 - out = out[bufoff:] - decoded += 256 - // There must at least be 3 buffers left. - if len(out) < dstEvery*3 { - return nil, errors.New("corruption detected: stream overrun 2") - } - } - } - if off > 0 { - ioff := int(off) - if len(out) < dstEvery*3+ioff { - return nil, errors.New("corruption detected: stream overrun 3") - } - copy(out, buf[:off]) - copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) - copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) - copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) - decoded += int(off) * 4 - out = out[off:] - } - - // Decode remaining. - for i := range br { - offset := dstEvery * i - br := &br[i] - bitsLeft := br.off*8 + uint(64-br.bitsRead) - for bitsLeft > 0 { - br.fill() - if false && br.bitsRead >= 32 { - if br.off >= 4 { - v := br.in[br.off-4:] - v = v[:4] - low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - br.value = (br.value << 32) | uint64(low) - br.bitsRead -= 32 - br.off -= 4 - } else { - for br.off > 0 { - br.value = (br.value << 8) | uint64(br.in[br.off-1]) - br.bitsRead -= 8 - br.off-- - } - } - } - // end inline... - if offset >= len(out) { - return nil, errors.New("corruption detected: stream overrun 4") - } - - // Read value and increment offset. - val := br.peekBitsFast(d.actualTableLog) - v := single[val&tlMask].entry - nBits := uint8(v) - br.advance(nBits) - bitsLeft -= uint(nBits) - out[offset] = uint8(v >> 8) - offset++ - } - decoded += offset - dstEvery*i - err = br.close() - if err != nil { - return nil, err - } - } - if dstSize != decoded { - return nil, errors.New("corruption detected: short output block") - } - return dst, nil -} - // Decompress4X will decompress a 4X encoded stream. // The length of the supplied input must match the end of a block exactly. // The *capacity* of the dst slice must match the destination size of @@ -914,18 +648,18 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { out := dst dstEvery := (dstSize + 3) / 4 - shift := (8 - d.actualTableLog) & 7 + shift := (56 + (8 - d.actualTableLog)) & 63 const tlSize = 1 << 8 single := d.dt.single[:tlSize] // Use temp table to avoid bound checks/append penalty. - var buf [256]byte + buf := d.buffer() var off uint8 var decoded int // Decode 4 values from each decoder/loop. - const bufoff = 256 / 4 + const bufoff = 256 for { if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { break @@ -935,120 +669,144 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { // Interleave 2 decodes. const stream = 0 const stream2 = 1 - br[stream].fillFast() - br[stream2].fillFast() - - v := single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 := single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br1 := &br[stream] + br2 := &br[stream2] + br1.fillFast() + br2.fillFast() + + v := single[uint8(br1.value>>shift)].entry + v2 := single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off] = uint8(v >> 8) + buf[stream2][off] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+1] = uint8(v >> 8) + buf[stream2][off+1] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+2] = uint8(v >> 8) + buf[stream2][off+2] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+3] = uint8(v >> 8) + buf[stream2][off+3] = uint8(v2 >> 8) } { const stream = 2 const stream2 = 3 - br[stream].fillFast() - br[stream2].fillFast() - - v := single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 := single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br1 := &br[stream] + br2 := &br[stream2] + br1.fillFast() + br2.fillFast() + + v := single[uint8(br1.value>>shift)].entry + v2 := single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off] = uint8(v >> 8) + buf[stream2][off] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+1] = uint8(v >> 8) + buf[stream2][off+1] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+2] = uint8(v >> 8) + buf[stream2][off+2] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+3] = uint8(v >> 8) + buf[stream2][off+3] = uint8(v2 >> 8) } off += 4 - if off == bufoff { + if off == 0 { if bufoff > dstEvery { + d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 1") } - copy(out, buf[:bufoff]) - copy(out[dstEvery:], buf[bufoff:bufoff*2]) - copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) - copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) - off = 0 - out = out[bufoff:] - decoded += 256 // There must at least be 3 buffers left. - if len(out) < dstEvery*3 { + if len(out)-bufoff < dstEvery*3 { + d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 2") } + //copy(out, buf[0][:]) + //copy(out[dstEvery:], buf[1][:]) + //copy(out[dstEvery*2:], buf[2][:]) + *(*[bufoff]byte)(out) = buf[0] + *(*[bufoff]byte)(out[dstEvery:]) = buf[1] + *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2] + *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3] + out = out[bufoff:] + decoded += bufoff * 4 } } if off > 0 { ioff := int(off) if len(out) < dstEvery*3+ioff { + d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 3") } - copy(out, buf[:off]) - copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) - copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) - copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) + copy(out, buf[0][:off]) + copy(out[dstEvery:], buf[1][:off]) + copy(out[dstEvery*2:], buf[2][:off]) + copy(out[dstEvery*3:], buf[3][:off]) decoded += int(off) * 4 out = out[off:] } // Decode remaining. + // Decode remaining. + remainBytes := dstEvery - (decoded / 4) for i := range br { offset := dstEvery * i + endsAt := offset + remainBytes + if endsAt > len(out) { + endsAt = len(out) + } br := &br[i] - bitsLeft := int(br.off*8) + int(64-br.bitsRead) + bitsLeft := br.remaining() for bitsLeft > 0 { if br.finished() { + d.bufs.Put(buf) return nil, io.ErrUnexpectedEOF } if br.bitsRead >= 56 { @@ -1068,24 +826,31 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { } } // end inline... - if offset >= len(out) { + if offset >= endsAt { + d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 4") } // Read value and increment offset. - v := single[br.peekByteFast()>>shift].entry + v := single[uint8(br.value>>shift)].entry nBits := uint8(v) br.advance(nBits) - bitsLeft -= int(nBits) + bitsLeft -= uint(nBits) out[offset] = uint8(v >> 8) offset++ } + if offset != endsAt { + d.bufs.Put(buf) + return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) + } decoded += offset - dstEvery*i err = br.close() if err != nil { + d.bufs.Put(buf) return nil, err } } + d.bufs.Put(buf) if dstSize != decoded { return nil, errors.New("corruption detected: short output block") } @@ -1121,18 +886,17 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { out := dst dstEvery := (dstSize + 3) / 4 - const shift = 0 + const shift = 56 const tlSize = 1 << 8 - const tlMask = tlSize - 1 single := d.dt.single[:tlSize] // Use temp table to avoid bound checks/append penalty. - var buf [256]byte + buf := d.buffer() var off uint8 var decoded int // Decode 4 values from each decoder/loop. - const bufoff = 256 / 4 + const bufoff = 256 for { if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { break @@ -1142,98 +906,116 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { // Interleave 2 decodes. const stream = 0 const stream2 = 1 - br[stream].fillFast() - br[stream2].fillFast() - - v := single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 := single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br1 := &br[stream] + br2 := &br[stream2] + br1.fillFast() + br2.fillFast() + + v := single[uint8(br1.value>>shift)].entry + v2 := single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off] = uint8(v >> 8) + buf[stream2][off] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+1] = uint8(v >> 8) + buf[stream2][off+1] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+2] = uint8(v >> 8) + buf[stream2][off+2] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+3] = uint8(v >> 8) + buf[stream2][off+3] = uint8(v2 >> 8) } { const stream = 2 const stream2 = 3 - br[stream].fillFast() - br[stream2].fillFast() - - v := single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 := single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry - buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br1 := &br[stream] + br2 := &br[stream2] + br1.fillFast() + br2.fillFast() + + v := single[uint8(br1.value>>shift)].entry + v2 := single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off] = uint8(v >> 8) + buf[stream2][off] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+1] = uint8(v >> 8) + buf[stream2][off+1] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+2] = uint8(v >> 8) + buf[stream2][off+2] = uint8(v2 >> 8) + + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 + buf[stream][off+3] = uint8(v >> 8) + buf[stream2][off+3] = uint8(v2 >> 8) } off += 4 - if off == bufoff { + if off == 0 { if bufoff > dstEvery { + d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 1") } - copy(out, buf[:bufoff]) - copy(out[dstEvery:], buf[bufoff:bufoff*2]) - copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) - copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) - off = 0 - out = out[bufoff:] - decoded += 256 // There must at least be 3 buffers left. - if len(out) < dstEvery*3 { + if len(out)-bufoff < dstEvery*3 { + d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 2") } + + //copy(out, buf[0][:]) + //copy(out[dstEvery:], buf[1][:]) + //copy(out[dstEvery*2:], buf[2][:]) + // copy(out[dstEvery*3:], buf[3][:]) + *(*[bufoff]byte)(out) = buf[0] + *(*[bufoff]byte)(out[dstEvery:]) = buf[1] + *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2] + *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3] + out = out[bufoff:] + decoded += bufoff * 4 } } if off > 0 { @@ -1241,21 +1023,27 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { if len(out) < dstEvery*3+ioff { return nil, errors.New("corruption detected: stream overrun 3") } - copy(out, buf[:off]) - copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) - copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) - copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) + copy(out, buf[0][:off]) + copy(out[dstEvery:], buf[1][:off]) + copy(out[dstEvery*2:], buf[2][:off]) + copy(out[dstEvery*3:], buf[3][:off]) decoded += int(off) * 4 out = out[off:] } // Decode remaining. + remainBytes := dstEvery - (decoded / 4) for i := range br { offset := dstEvery * i + endsAt := offset + remainBytes + if endsAt > len(out) { + endsAt = len(out) + } br := &br[i] - bitsLeft := int(br.off*8) + int(64-br.bitsRead) + bitsLeft := br.remaining() for bitsLeft > 0 { if br.finished() { + d.bufs.Put(buf) return nil, io.ErrUnexpectedEOF } if br.bitsRead >= 56 { @@ -1275,24 +1063,32 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { } } // end inline... - if offset >= len(out) { + if offset >= endsAt { + d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 4") } // Read value and increment offset. - v := single[br.peekByteFast()>>shift].entry + v := single[br.peekByteFast()].entry nBits := uint8(v) br.advance(nBits) - bitsLeft -= int(nBits) + bitsLeft -= uint(nBits) out[offset] = uint8(v >> 8) offset++ } + if offset != endsAt { + d.bufs.Put(buf) + return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) + } + decoded += offset - dstEvery*i err = br.close() if err != nil { + d.bufs.Put(buf) return nil, err } } + d.bufs.Put(buf) if dstSize != decoded { return nil, errors.New("corruption detected: short output block") } diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go new file mode 100644 index 00000000..ba7e8e6b --- /dev/null +++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go @@ -0,0 +1,226 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +// This file contains the specialisation of Decoder.Decompress4X +// and Decoder.Decompress1X that use an asm implementation of thir main loops. +package huff0 + +import ( + "errors" + "fmt" + + "github.com/klauspost/compress/internal/cpuinfo" +) + +// decompress4x_main_loop_x86 is an x86 assembler implementation +// of Decompress4X when tablelog > 8. +// +//go:noescape +func decompress4x_main_loop_amd64(ctx *decompress4xContext) + +// decompress4x_8b_loop_x86 is an x86 assembler implementation +// of Decompress4X when tablelog <= 8 which decodes 4 entries +// per loop. +// +//go:noescape +func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext) + +// fallback8BitSize is the size where using Go version is faster. +const fallback8BitSize = 800 + +type decompress4xContext struct { + pbr *[4]bitReaderShifted + peekBits uint8 + out *byte + dstEvery int + tbl *dEntrySingle + decoded int + limit *byte +} + +// Decompress4X will decompress a 4X encoded stream. +// The length of the supplied input must match the end of a block exactly. +// The *capacity* of the dst slice must match the destination size of +// the uncompressed data exactly. +func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { + if len(d.dt.single) == 0 { + return nil, errors.New("no table loaded") + } + if len(src) < 6+(4*1) { + return nil, errors.New("input too small") + } + + use8BitTables := d.actualTableLog <= 8 + if cap(dst) < fallback8BitSize && use8BitTables { + return d.decompress4X8bit(dst, src) + } + + var br [4]bitReaderShifted + // Decode "jump table" + start := 6 + for i := 0; i < 3; i++ { + length := int(src[i*2]) | (int(src[i*2+1]) << 8) + if start+length >= len(src) { + return nil, errors.New("truncated input (or invalid offset)") + } + err := br[i].init(src[start : start+length]) + if err != nil { + return nil, err + } + start += length + } + err := br[3].init(src[start:]) + if err != nil { + return nil, err + } + + // destination, offset to match first output + dstSize := cap(dst) + dst = dst[:dstSize] + out := dst + dstEvery := (dstSize + 3) / 4 + + const tlSize = 1 << tableLogMax + const tlMask = tlSize - 1 + single := d.dt.single[:tlSize] + + var decoded int + + if len(out) > 4*4 && !(br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4) { + ctx := decompress4xContext{ + pbr: &br, + peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast() + out: &out[0], + dstEvery: dstEvery, + tbl: &single[0], + limit: &out[dstEvery-4], // Always stop decoding when first buffer gets here to avoid writing OOB on last. + } + if use8BitTables { + decompress4x_8b_main_loop_amd64(&ctx) + } else { + decompress4x_main_loop_amd64(&ctx) + } + + decoded = ctx.decoded + out = out[decoded/4:] + } + + // Decode remaining. + remainBytes := dstEvery - (decoded / 4) + for i := range br { + offset := dstEvery * i + endsAt := offset + remainBytes + if endsAt > len(out) { + endsAt = len(out) + } + br := &br[i] + bitsLeft := br.remaining() + for bitsLeft > 0 { + br.fill() + if offset >= endsAt { + return nil, errors.New("corruption detected: stream overrun 4") + } + + // Read value and increment offset. + val := br.peekBitsFast(d.actualTableLog) + v := single[val&tlMask].entry + nBits := uint8(v) + br.advance(nBits) + bitsLeft -= uint(nBits) + out[offset] = uint8(v >> 8) + offset++ + } + if offset != endsAt { + return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) + } + decoded += offset - dstEvery*i + err = br.close() + if err != nil { + return nil, err + } + } + if dstSize != decoded { + return nil, errors.New("corruption detected: short output block") + } + return dst, nil +} + +// decompress4x_main_loop_x86 is an x86 assembler implementation +// of Decompress1X when tablelog > 8. +// +//go:noescape +func decompress1x_main_loop_amd64(ctx *decompress1xContext) + +// decompress4x_main_loop_x86 is an x86 with BMI2 assembler implementation +// of Decompress1X when tablelog > 8. +// +//go:noescape +func decompress1x_main_loop_bmi2(ctx *decompress1xContext) + +type decompress1xContext struct { + pbr *bitReaderShifted + peekBits uint8 + out *byte + outCap int + tbl *dEntrySingle + decoded int +} + +// Error reported by asm implementations +const error_max_decoded_size_exeeded = -1 + +// Decompress1X will decompress a 1X encoded stream. +// The cap of the output buffer will be the maximum decompressed size. +// The length of the supplied input must match the end of a block exactly. +func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { + if len(d.dt.single) == 0 { + return nil, errors.New("no table loaded") + } + var br bitReaderShifted + err := br.init(src) + if err != nil { + return dst, err + } + maxDecodedSize := cap(dst) + dst = dst[:maxDecodedSize] + + const tlSize = 1 << tableLogMax + const tlMask = tlSize - 1 + + if maxDecodedSize >= 4 { + ctx := decompress1xContext{ + pbr: &br, + out: &dst[0], + outCap: maxDecodedSize, + peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast() + tbl: &d.dt.single[0], + } + + if cpuinfo.HasBMI2() { + decompress1x_main_loop_bmi2(&ctx) + } else { + decompress1x_main_loop_amd64(&ctx) + } + if ctx.decoded == error_max_decoded_size_exeeded { + return nil, ErrMaxDecodedSizeExceeded + } + + dst = dst[:ctx.decoded] + } + + // br < 8, so uint8 is fine + bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead + for bitsLeft > 0 { + br.fill() + if len(dst) >= maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask] + nBits := uint8(v.entry) + br.advance(nBits) + bitsLeft -= nBits + dst = append(dst, uint8(v.entry>>8)) + } + return dst, br.close() +} diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s new file mode 100644 index 00000000..c4c7ab2d --- /dev/null +++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s @@ -0,0 +1,830 @@ +// Code generated by command: go run gen.go -out ../decompress_amd64.s -pkg=huff0. DO NOT EDIT. + +//go:build amd64 && !appengine && !noasm && gc + +// func decompress4x_main_loop_amd64(ctx *decompress4xContext) +TEXT ·decompress4x_main_loop_amd64(SB), $0-8 + // Preload values + MOVQ ctx+0(FP), AX + MOVBQZX 8(AX), DI + MOVQ 16(AX), BX + MOVQ 48(AX), SI + MOVQ 24(AX), R8 + MOVQ 32(AX), R9 + MOVQ (AX), R10 + + // Main loop +main_loop: + XORL DX, DX + CMPQ BX, SI + SETGE DL + + // br0.fillFast32() + MOVQ 32(R10), R11 + MOVBQZX 40(R10), R12 + CMPQ R12, $0x20 + JBE skip_fill0 + MOVQ 24(R10), AX + SUBQ $0x20, R12 + SUBQ $0x04, AX + MOVQ (R10), R13 + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (AX)(R13*1), R13 + MOVQ R12, CX + SHLQ CL, R13 + MOVQ AX, 24(R10) + ORQ R13, R11 + + // exhausted += (br0.off < 4) + CMPQ AX, $0x04 + ADCB $+0, DL + +skip_fill0: + // val0 := br0.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v0 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br0.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R11 + ADDB CL, R12 + + // val1 := br0.peekTopBits(peekBits) + MOVQ DI, CX + MOVQ R11, R13 + SHRQ CL, R13 + + // v1 := table[val1&mask] + MOVW (R9)(R13*2), CX + + // br0.advance(uint8(v1.entry)) + MOVB CH, AH + SHLQ CL, R11 + ADDB CL, R12 + + // these two writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + MOVW AX, (BX) + + // update the bitreader structure + MOVQ R11, 32(R10) + MOVB R12, 40(R10) + + // br1.fillFast32() + MOVQ 80(R10), R11 + MOVBQZX 88(R10), R12 + CMPQ R12, $0x20 + JBE skip_fill1 + MOVQ 72(R10), AX + SUBQ $0x20, R12 + SUBQ $0x04, AX + MOVQ 48(R10), R13 + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (AX)(R13*1), R13 + MOVQ R12, CX + SHLQ CL, R13 + MOVQ AX, 72(R10) + ORQ R13, R11 + + // exhausted += (br1.off < 4) + CMPQ AX, $0x04 + ADCB $+0, DL + +skip_fill1: + // val0 := br1.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v0 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br1.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R11 + ADDB CL, R12 + + // val1 := br1.peekTopBits(peekBits) + MOVQ DI, CX + MOVQ R11, R13 + SHRQ CL, R13 + + // v1 := table[val1&mask] + MOVW (R9)(R13*2), CX + + // br1.advance(uint8(v1.entry)) + MOVB CH, AH + SHLQ CL, R11 + ADDB CL, R12 + + // these two writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + MOVW AX, (BX)(R8*1) + + // update the bitreader structure + MOVQ R11, 80(R10) + MOVB R12, 88(R10) + + // br2.fillFast32() + MOVQ 128(R10), R11 + MOVBQZX 136(R10), R12 + CMPQ R12, $0x20 + JBE skip_fill2 + MOVQ 120(R10), AX + SUBQ $0x20, R12 + SUBQ $0x04, AX + MOVQ 96(R10), R13 + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (AX)(R13*1), R13 + MOVQ R12, CX + SHLQ CL, R13 + MOVQ AX, 120(R10) + ORQ R13, R11 + + // exhausted += (br2.off < 4) + CMPQ AX, $0x04 + ADCB $+0, DL + +skip_fill2: + // val0 := br2.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v0 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br2.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R11 + ADDB CL, R12 + + // val1 := br2.peekTopBits(peekBits) + MOVQ DI, CX + MOVQ R11, R13 + SHRQ CL, R13 + + // v1 := table[val1&mask] + MOVW (R9)(R13*2), CX + + // br2.advance(uint8(v1.entry)) + MOVB CH, AH + SHLQ CL, R11 + ADDB CL, R12 + + // these two writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + MOVW AX, (BX)(R8*2) + + // update the bitreader structure + MOVQ R11, 128(R10) + MOVB R12, 136(R10) + + // br3.fillFast32() + MOVQ 176(R10), R11 + MOVBQZX 184(R10), R12 + CMPQ R12, $0x20 + JBE skip_fill3 + MOVQ 168(R10), AX + SUBQ $0x20, R12 + SUBQ $0x04, AX + MOVQ 144(R10), R13 + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (AX)(R13*1), R13 + MOVQ R12, CX + SHLQ CL, R13 + MOVQ AX, 168(R10) + ORQ R13, R11 + + // exhausted += (br3.off < 4) + CMPQ AX, $0x04 + ADCB $+0, DL + +skip_fill3: + // val0 := br3.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v0 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br3.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R11 + ADDB CL, R12 + + // val1 := br3.peekTopBits(peekBits) + MOVQ DI, CX + MOVQ R11, R13 + SHRQ CL, R13 + + // v1 := table[val1&mask] + MOVW (R9)(R13*2), CX + + // br3.advance(uint8(v1.entry)) + MOVB CH, AH + SHLQ CL, R11 + ADDB CL, R12 + + // these two writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + LEAQ (R8)(R8*2), CX + MOVW AX, (BX)(CX*1) + + // update the bitreader structure + MOVQ R11, 176(R10) + MOVB R12, 184(R10) + ADDQ $0x02, BX + TESTB DL, DL + JZ main_loop + MOVQ ctx+0(FP), AX + SUBQ 16(AX), BX + SHLQ $0x02, BX + MOVQ BX, 40(AX) + RET + +// func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext) +TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8 + // Preload values + MOVQ ctx+0(FP), CX + MOVBQZX 8(CX), DI + MOVQ 16(CX), BX + MOVQ 48(CX), SI + MOVQ 24(CX), R8 + MOVQ 32(CX), R9 + MOVQ (CX), R10 + + // Main loop +main_loop: + XORL DX, DX + CMPQ BX, SI + SETGE DL + + // br0.fillFast32() + MOVQ 32(R10), R11 + MOVBQZX 40(R10), R12 + CMPQ R12, $0x20 + JBE skip_fill0 + MOVQ 24(R10), R13 + SUBQ $0x20, R12 + SUBQ $0x04, R13 + MOVQ (R10), R14 + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (R13)(R14*1), R14 + MOVQ R12, CX + SHLQ CL, R14 + MOVQ R13, 24(R10) + ORQ R14, R11 + + // exhausted += (br0.off < 4) + CMPQ R13, $0x04 + ADCB $+0, DL + +skip_fill0: + // val0 := br0.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v0 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br0.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R11 + ADDB CL, R12 + + // val1 := br0.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v1 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br0.advance(uint8(v1.entry) + MOVB CH, AH + SHLQ CL, R11 + ADDB CL, R12 + BSWAPL AX + + // val2 := br0.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v2 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br0.advance(uint8(v2.entry) + MOVB CH, AH + SHLQ CL, R11 + ADDB CL, R12 + + // val3 := br0.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v3 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br0.advance(uint8(v3.entry) + MOVB CH, AL + SHLQ CL, R11 + ADDB CL, R12 + BSWAPL AX + + // these four writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + // out[id * dstEvery + 3] = uint8(v2.entry >> 8) + // out[id * dstEvery + 4] = uint8(v3.entry >> 8) + MOVL AX, (BX) + + // update the bitreader structure + MOVQ R11, 32(R10) + MOVB R12, 40(R10) + + // br1.fillFast32() + MOVQ 80(R10), R11 + MOVBQZX 88(R10), R12 + CMPQ R12, $0x20 + JBE skip_fill1 + MOVQ 72(R10), R13 + SUBQ $0x20, R12 + SUBQ $0x04, R13 + MOVQ 48(R10), R14 + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (R13)(R14*1), R14 + MOVQ R12, CX + SHLQ CL, R14 + MOVQ R13, 72(R10) + ORQ R14, R11 + + // exhausted += (br1.off < 4) + CMPQ R13, $0x04 + ADCB $+0, DL + +skip_fill1: + // val0 := br1.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v0 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br1.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R11 + ADDB CL, R12 + + // val1 := br1.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v1 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br1.advance(uint8(v1.entry) + MOVB CH, AH + SHLQ CL, R11 + ADDB CL, R12 + BSWAPL AX + + // val2 := br1.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v2 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br1.advance(uint8(v2.entry) + MOVB CH, AH + SHLQ CL, R11 + ADDB CL, R12 + + // val3 := br1.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v3 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br1.advance(uint8(v3.entry) + MOVB CH, AL + SHLQ CL, R11 + ADDB CL, R12 + BSWAPL AX + + // these four writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + // out[id * dstEvery + 3] = uint8(v2.entry >> 8) + // out[id * dstEvery + 4] = uint8(v3.entry >> 8) + MOVL AX, (BX)(R8*1) + + // update the bitreader structure + MOVQ R11, 80(R10) + MOVB R12, 88(R10) + + // br2.fillFast32() + MOVQ 128(R10), R11 + MOVBQZX 136(R10), R12 + CMPQ R12, $0x20 + JBE skip_fill2 + MOVQ 120(R10), R13 + SUBQ $0x20, R12 + SUBQ $0x04, R13 + MOVQ 96(R10), R14 + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (R13)(R14*1), R14 + MOVQ R12, CX + SHLQ CL, R14 + MOVQ R13, 120(R10) + ORQ R14, R11 + + // exhausted += (br2.off < 4) + CMPQ R13, $0x04 + ADCB $+0, DL + +skip_fill2: + // val0 := br2.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v0 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br2.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R11 + ADDB CL, R12 + + // val1 := br2.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v1 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br2.advance(uint8(v1.entry) + MOVB CH, AH + SHLQ CL, R11 + ADDB CL, R12 + BSWAPL AX + + // val2 := br2.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v2 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br2.advance(uint8(v2.entry) + MOVB CH, AH + SHLQ CL, R11 + ADDB CL, R12 + + // val3 := br2.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v3 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br2.advance(uint8(v3.entry) + MOVB CH, AL + SHLQ CL, R11 + ADDB CL, R12 + BSWAPL AX + + // these four writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + // out[id * dstEvery + 3] = uint8(v2.entry >> 8) + // out[id * dstEvery + 4] = uint8(v3.entry >> 8) + MOVL AX, (BX)(R8*2) + + // update the bitreader structure + MOVQ R11, 128(R10) + MOVB R12, 136(R10) + + // br3.fillFast32() + MOVQ 176(R10), R11 + MOVBQZX 184(R10), R12 + CMPQ R12, $0x20 + JBE skip_fill3 + MOVQ 168(R10), R13 + SUBQ $0x20, R12 + SUBQ $0x04, R13 + MOVQ 144(R10), R14 + + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (R13)(R14*1), R14 + MOVQ R12, CX + SHLQ CL, R14 + MOVQ R13, 168(R10) + ORQ R14, R11 + + // exhausted += (br3.off < 4) + CMPQ R13, $0x04 + ADCB $+0, DL + +skip_fill3: + // val0 := br3.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v0 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br3.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R11 + ADDB CL, R12 + + // val1 := br3.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v1 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br3.advance(uint8(v1.entry) + MOVB CH, AH + SHLQ CL, R11 + ADDB CL, R12 + BSWAPL AX + + // val2 := br3.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v2 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br3.advance(uint8(v2.entry) + MOVB CH, AH + SHLQ CL, R11 + ADDB CL, R12 + + // val3 := br3.peekTopBits(peekBits) + MOVQ R11, R13 + MOVQ DI, CX + SHRQ CL, R13 + + // v3 := table[val0&mask] + MOVW (R9)(R13*2), CX + + // br3.advance(uint8(v3.entry) + MOVB CH, AL + SHLQ CL, R11 + ADDB CL, R12 + BSWAPL AX + + // these four writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + // out[id * dstEvery + 3] = uint8(v2.entry >> 8) + // out[id * dstEvery + 4] = uint8(v3.entry >> 8) + LEAQ (R8)(R8*2), CX + MOVL AX, (BX)(CX*1) + + // update the bitreader structure + MOVQ R11, 176(R10) + MOVB R12, 184(R10) + ADDQ $0x04, BX + TESTB DL, DL + JZ main_loop + MOVQ ctx+0(FP), AX + SUBQ 16(AX), BX + SHLQ $0x02, BX + MOVQ BX, 40(AX) + RET + +// func decompress1x_main_loop_amd64(ctx *decompress1xContext) +TEXT ·decompress1x_main_loop_amd64(SB), $0-8 + MOVQ ctx+0(FP), CX + MOVQ 16(CX), DX + MOVQ 24(CX), BX + CMPQ BX, $0x04 + JB error_max_decoded_size_exceeded + LEAQ (DX)(BX*1), BX + MOVQ (CX), SI + MOVQ (SI), R8 + MOVQ 24(SI), R9 + MOVQ 32(SI), R10 + MOVBQZX 40(SI), R11 + MOVQ 32(CX), SI + MOVBQZX 8(CX), DI + JMP loop_condition + +main_loop: + // Check if we have room for 4 bytes in the output buffer + LEAQ 4(DX), CX + CMPQ CX, BX + JGE error_max_decoded_size_exceeded + + // Decode 4 values + CMPQ R11, $0x20 + JL bitReader_fillFast_1_end + SUBQ $0x20, R11 + SUBQ $0x04, R9 + MOVL (R8)(R9*1), R12 + MOVQ R11, CX + SHLQ CL, R12 + ORQ R12, R10 + +bitReader_fillFast_1_end: + MOVQ DI, CX + MOVQ R10, R12 + SHRQ CL, R12 + MOVW (SI)(R12*2), CX + MOVB CH, AL + MOVBQZX CL, CX + ADDQ CX, R11 + SHLQ CL, R10 + MOVQ DI, CX + MOVQ R10, R12 + SHRQ CL, R12 + MOVW (SI)(R12*2), CX + MOVB CH, AH + MOVBQZX CL, CX + ADDQ CX, R11 + SHLQ CL, R10 + BSWAPL AX + CMPQ R11, $0x20 + JL bitReader_fillFast_2_end + SUBQ $0x20, R11 + SUBQ $0x04, R9 + MOVL (R8)(R9*1), R12 + MOVQ R11, CX + SHLQ CL, R12 + ORQ R12, R10 + +bitReader_fillFast_2_end: + MOVQ DI, CX + MOVQ R10, R12 + SHRQ CL, R12 + MOVW (SI)(R12*2), CX + MOVB CH, AH + MOVBQZX CL, CX + ADDQ CX, R11 + SHLQ CL, R10 + MOVQ DI, CX + MOVQ R10, R12 + SHRQ CL, R12 + MOVW (SI)(R12*2), CX + MOVB CH, AL + MOVBQZX CL, CX + ADDQ CX, R11 + SHLQ CL, R10 + BSWAPL AX + + // Store the decoded values + MOVL AX, (DX) + ADDQ $0x04, DX + +loop_condition: + CMPQ R9, $0x08 + JGE main_loop + + // Update ctx structure + MOVQ ctx+0(FP), AX + SUBQ 16(AX), DX + MOVQ DX, 40(AX) + MOVQ (AX), AX + MOVQ R9, 24(AX) + MOVQ R10, 32(AX) + MOVB R11, 40(AX) + RET + + // Report error +error_max_decoded_size_exceeded: + MOVQ ctx+0(FP), AX + MOVQ $-1, CX + MOVQ CX, 40(AX) + RET + +// func decompress1x_main_loop_bmi2(ctx *decompress1xContext) +// Requires: BMI2 +TEXT ·decompress1x_main_loop_bmi2(SB), $0-8 + MOVQ ctx+0(FP), CX + MOVQ 16(CX), DX + MOVQ 24(CX), BX + CMPQ BX, $0x04 + JB error_max_decoded_size_exceeded + LEAQ (DX)(BX*1), BX + MOVQ (CX), SI + MOVQ (SI), R8 + MOVQ 24(SI), R9 + MOVQ 32(SI), R10 + MOVBQZX 40(SI), R11 + MOVQ 32(CX), SI + MOVBQZX 8(CX), DI + JMP loop_condition + +main_loop: + // Check if we have room for 4 bytes in the output buffer + LEAQ 4(DX), CX + CMPQ CX, BX + JGE error_max_decoded_size_exceeded + + // Decode 4 values + CMPQ R11, $0x20 + JL bitReader_fillFast_1_end + SUBQ $0x20, R11 + SUBQ $0x04, R9 + MOVL (R8)(R9*1), CX + SHLXQ R11, CX, CX + ORQ CX, R10 + +bitReader_fillFast_1_end: + SHRXQ DI, R10, CX + MOVW (SI)(CX*2), CX + MOVB CH, AL + MOVBQZX CL, CX + ADDQ CX, R11 + SHLXQ CX, R10, R10 + SHRXQ DI, R10, CX + MOVW (SI)(CX*2), CX + MOVB CH, AH + MOVBQZX CL, CX + ADDQ CX, R11 + SHLXQ CX, R10, R10 + BSWAPL AX + CMPQ R11, $0x20 + JL bitReader_fillFast_2_end + SUBQ $0x20, R11 + SUBQ $0x04, R9 + MOVL (R8)(R9*1), CX + SHLXQ R11, CX, CX + ORQ CX, R10 + +bitReader_fillFast_2_end: + SHRXQ DI, R10, CX + MOVW (SI)(CX*2), CX + MOVB CH, AH + MOVBQZX CL, CX + ADDQ CX, R11 + SHLXQ CX, R10, R10 + SHRXQ DI, R10, CX + MOVW (SI)(CX*2), CX + MOVB CH, AL + MOVBQZX CL, CX + ADDQ CX, R11 + SHLXQ CX, R10, R10 + BSWAPL AX + + // Store the decoded values + MOVL AX, (DX) + ADDQ $0x04, DX + +loop_condition: + CMPQ R9, $0x08 + JGE main_loop + + // Update ctx structure + MOVQ ctx+0(FP), AX + SUBQ 16(AX), DX + MOVQ DX, 40(AX) + MOVQ (AX), AX + MOVQ R9, 24(AX) + MOVQ R10, 32(AX) + MOVB R11, 40(AX) + RET + + // Report error +error_max_decoded_size_exceeded: + MOVQ ctx+0(FP), AX + MOVQ $-1, CX + MOVQ CX, 40(AX) + RET diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_generic.go b/vendor/github.com/klauspost/compress/huff0/decompress_generic.go new file mode 100644 index 00000000..908c17de --- /dev/null +++ b/vendor/github.com/klauspost/compress/huff0/decompress_generic.go @@ -0,0 +1,299 @@ +//go:build !amd64 || appengine || !gc || noasm +// +build !amd64 appengine !gc noasm + +// This file contains a generic implementation of Decoder.Decompress4X. +package huff0 + +import ( + "errors" + "fmt" +) + +// Decompress4X will decompress a 4X encoded stream. +// The length of the supplied input must match the end of a block exactly. +// The *capacity* of the dst slice must match the destination size of +// the uncompressed data exactly. +func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { + if len(d.dt.single) == 0 { + return nil, errors.New("no table loaded") + } + if len(src) < 6+(4*1) { + return nil, errors.New("input too small") + } + if use8BitTables && d.actualTableLog <= 8 { + return d.decompress4X8bit(dst, src) + } + + var br [4]bitReaderShifted + // Decode "jump table" + start := 6 + for i := 0; i < 3; i++ { + length := int(src[i*2]) | (int(src[i*2+1]) << 8) + if start+length >= len(src) { + return nil, errors.New("truncated input (or invalid offset)") + } + err := br[i].init(src[start : start+length]) + if err != nil { + return nil, err + } + start += length + } + err := br[3].init(src[start:]) + if err != nil { + return nil, err + } + + // destination, offset to match first output + dstSize := cap(dst) + dst = dst[:dstSize] + out := dst + dstEvery := (dstSize + 3) / 4 + + const tlSize = 1 << tableLogMax + const tlMask = tlSize - 1 + single := d.dt.single[:tlSize] + + // Use temp table to avoid bound checks/append penalty. + buf := d.buffer() + var off uint8 + var decoded int + + // Decode 2 values from each decoder/loop. + const bufoff = 256 + for { + if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { + break + } + + { + const stream = 0 + const stream2 = 1 + br[stream].fillFast() + br[stream2].fillFast() + + val := br[stream].peekBitsFast(d.actualTableLog) + val2 := br[stream2].peekBitsFast(d.actualTableLog) + v := single[val&tlMask] + v2 := single[val2&tlMask] + br[stream].advance(uint8(v.entry)) + br[stream2].advance(uint8(v2.entry)) + buf[stream][off] = uint8(v.entry >> 8) + buf[stream2][off] = uint8(v2.entry >> 8) + + val = br[stream].peekBitsFast(d.actualTableLog) + val2 = br[stream2].peekBitsFast(d.actualTableLog) + v = single[val&tlMask] + v2 = single[val2&tlMask] + br[stream].advance(uint8(v.entry)) + br[stream2].advance(uint8(v2.entry)) + buf[stream][off+1] = uint8(v.entry >> 8) + buf[stream2][off+1] = uint8(v2.entry >> 8) + } + + { + const stream = 2 + const stream2 = 3 + br[stream].fillFast() + br[stream2].fillFast() + + val := br[stream].peekBitsFast(d.actualTableLog) + val2 := br[stream2].peekBitsFast(d.actualTableLog) + v := single[val&tlMask] + v2 := single[val2&tlMask] + br[stream].advance(uint8(v.entry)) + br[stream2].advance(uint8(v2.entry)) + buf[stream][off] = uint8(v.entry >> 8) + buf[stream2][off] = uint8(v2.entry >> 8) + + val = br[stream].peekBitsFast(d.actualTableLog) + val2 = br[stream2].peekBitsFast(d.actualTableLog) + v = single[val&tlMask] + v2 = single[val2&tlMask] + br[stream].advance(uint8(v.entry)) + br[stream2].advance(uint8(v2.entry)) + buf[stream][off+1] = uint8(v.entry >> 8) + buf[stream2][off+1] = uint8(v2.entry >> 8) + } + + off += 2 + + if off == 0 { + if bufoff > dstEvery { + d.bufs.Put(buf) + return nil, errors.New("corruption detected: stream overrun 1") + } + // There must at least be 3 buffers left. + if len(out)-bufoff < dstEvery*3 { + d.bufs.Put(buf) + return nil, errors.New("corruption detected: stream overrun 2") + } + //copy(out, buf[0][:]) + //copy(out[dstEvery:], buf[1][:]) + //copy(out[dstEvery*2:], buf[2][:]) + //copy(out[dstEvery*3:], buf[3][:]) + *(*[bufoff]byte)(out) = buf[0] + *(*[bufoff]byte)(out[dstEvery:]) = buf[1] + *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2] + *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3] + out = out[bufoff:] + decoded += bufoff * 4 + } + } + if off > 0 { + ioff := int(off) + if len(out) < dstEvery*3+ioff { + d.bufs.Put(buf) + return nil, errors.New("corruption detected: stream overrun 3") + } + copy(out, buf[0][:off]) + copy(out[dstEvery:], buf[1][:off]) + copy(out[dstEvery*2:], buf[2][:off]) + copy(out[dstEvery*3:], buf[3][:off]) + decoded += int(off) * 4 + out = out[off:] + } + + // Decode remaining. + remainBytes := dstEvery - (decoded / 4) + for i := range br { + offset := dstEvery * i + endsAt := offset + remainBytes + if endsAt > len(out) { + endsAt = len(out) + } + br := &br[i] + bitsLeft := br.remaining() + for bitsLeft > 0 { + br.fill() + if offset >= endsAt { + d.bufs.Put(buf) + return nil, errors.New("corruption detected: stream overrun 4") + } + + // Read value and increment offset. + val := br.peekBitsFast(d.actualTableLog) + v := single[val&tlMask].entry + nBits := uint8(v) + br.advance(nBits) + bitsLeft -= uint(nBits) + out[offset] = uint8(v >> 8) + offset++ + } + if offset != endsAt { + d.bufs.Put(buf) + return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) + } + decoded += offset - dstEvery*i + err = br.close() + if err != nil { + return nil, err + } + } + d.bufs.Put(buf) + if dstSize != decoded { + return nil, errors.New("corruption detected: short output block") + } + return dst, nil +} + +// Decompress1X will decompress a 1X encoded stream. +// The cap of the output buffer will be the maximum decompressed size. +// The length of the supplied input must match the end of a block exactly. +func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { + if len(d.dt.single) == 0 { + return nil, errors.New("no table loaded") + } + if use8BitTables && d.actualTableLog <= 8 { + return d.decompress1X8Bit(dst, src) + } + var br bitReaderShifted + err := br.init(src) + if err != nil { + return dst, err + } + maxDecodedSize := cap(dst) + dst = dst[:0] + + // Avoid bounds check by always having full sized table. + const tlSize = 1 << tableLogMax + const tlMask = tlSize - 1 + dt := d.dt.single[:tlSize] + + // Use temp table to avoid bound checks/append penalty. + bufs := d.buffer() + buf := &bufs[0] + var off uint8 + + for br.off >= 8 { + br.fillFast() + v := dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + // Refill + br.fillFast() + + v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + + off += 4 + if off == 0 { + if len(dst)+256 > maxDecodedSize { + br.close() + d.bufs.Put(bufs) + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:]...) + } + } + + if len(dst)+int(off) > maxDecodedSize { + d.bufs.Put(bufs) + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:off]...) + + // br < 8, so uint8 is fine + bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead + for bitsLeft > 0 { + br.fill() + if false && br.bitsRead >= 32 { + if br.off >= 4 { + v := br.in[br.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + br.value = (br.value << 32) | uint64(low) + br.bitsRead -= 32 + br.off -= 4 + } else { + for br.off > 0 { + br.value = (br.value << 8) | uint64(br.in[br.off-1]) + br.bitsRead -= 8 + br.off-- + } + } + } + if len(dst) >= maxDecodedSize { + d.bufs.Put(bufs) + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask] + nBits := uint8(v.entry) + br.advance(nBits) + bitsLeft -= nBits + dst = append(dst, uint8(v.entry>>8)) + } + d.bufs.Put(bufs) + return dst, br.close() +} diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go index 3ee00ecb..e8ad17ad 100644 --- a/vendor/github.com/klauspost/compress/huff0/huff0.go +++ b/vendor/github.com/klauspost/compress/huff0/huff0.go @@ -8,6 +8,7 @@ import ( "fmt" "math" "math/bits" + "sync" "github.com/klauspost/compress/fse" ) @@ -116,6 +117,7 @@ type Scratch struct { nodes []nodeElt tmpOut [4][]byte fse *fse.Scratch + decPool sync.Pool // *[4][256]byte buffers. huffWeight [maxSymbolValue + 1]byte } diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go new file mode 100644 index 00000000..3954c512 --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go @@ -0,0 +1,34 @@ +// Package cpuinfo gives runtime info about the current CPU. +// +// This is a very limited module meant for use internally +// in this project. For more versatile solution check +// https://github.com/klauspost/cpuid. +package cpuinfo + +// HasBMI1 checks whether an x86 CPU supports the BMI1 extension. +func HasBMI1() bool { + return hasBMI1 +} + +// HasBMI2 checks whether an x86 CPU supports the BMI2 extension. +func HasBMI2() bool { + return hasBMI2 +} + +// DisableBMI2 will disable BMI2, for testing purposes. +// Call returned function to restore previous state. +func DisableBMI2() func() { + old := hasBMI2 + hasBMI2 = false + return func() { + hasBMI2 = old + } +} + +// HasBMI checks whether an x86 CPU supports both BMI1 and BMI2 extensions. +func HasBMI() bool { + return HasBMI1() && HasBMI2() +} + +var hasBMI1 bool +var hasBMI2 bool diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go new file mode 100644 index 00000000..e802579c --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go @@ -0,0 +1,11 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +package cpuinfo + +// go:noescape +func x86extensions() (bmi1, bmi2 bool) + +func init() { + hasBMI1, hasBMI2 = x86extensions() +} diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s new file mode 100644 index 00000000..4465fbe9 --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s @@ -0,0 +1,36 @@ +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" +#include "funcdata.h" +#include "go_asm.h" + +TEXT ·x86extensions(SB), NOSPLIT, $0 + // 1. determine max EAX value + XORQ AX, AX + CPUID + + CMPQ AX, $7 + JB unsupported + + // 2. EAX = 7, ECX = 0 --- see Table 3-8 "Information Returned by CPUID Instruction" + MOVQ $7, AX + MOVQ $0, CX + CPUID + + BTQ $3, BX // bit 3 = BMI1 + SETCS AL + + BTQ $8, BX // bit 8 = BMI2 + SETCS AH + + MOVB AL, bmi1+0(FP) + MOVB AH, bmi2+1(FP) + RET + +unsupported: + XORQ AX, AX + MOVB AL, bmi1+0(FP) + MOVB AL, bmi2+1(FP) + RET diff --git a/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go index 511bba65..2aa6a95a 100644 --- a/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go +++ b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go @@ -18,6 +18,7 @@ func load64(b []byte, i int) uint64 { // emitLiteral writes a literal chunk and returns the number of bytes written. // // It assumes that: +// // dst is long enough to hold the encoded bytes // 1 <= len(lit) && len(lit) <= 65536 func emitLiteral(dst, lit []byte) int { @@ -42,6 +43,7 @@ func emitLiteral(dst, lit []byte) int { // emitCopy writes a copy chunk and returns the number of bytes written. // // It assumes that: +// // dst is long enough to hold the encoded bytes // 1 <= offset && offset <= 65535 // 4 <= length && length <= 65535 @@ -85,28 +87,40 @@ func emitCopy(dst []byte, offset, length int) int { return i + 2 } -// extendMatch returns the largest k such that k <= len(src) and that -// src[i:i+k-j] and src[j:k] have the same contents. -// -// It assumes that: -// 0 <= i && i < j && j <= len(src) -func extendMatch(src []byte, i, j int) int { - for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 { - } - return j -} - func hash(u, shift uint32) uint32 { return (u * 0x1e35a7bd) >> shift } +// EncodeBlockInto exposes encodeBlock but checks dst size. +func EncodeBlockInto(dst, src []byte) (d int) { + if MaxEncodedLen(len(src)) > len(dst) { + return 0 + } + + // encodeBlock breaks on too big blocks, so split. + for len(src) > 0 { + p := src + src = nil + if len(p) > maxBlockSize { + p, src = p[:maxBlockSize], p[maxBlockSize:] + } + if len(p) < minNonLiteralBlockSize { + d += emitLiteral(dst[d:], p) + } else { + d += encodeBlock(dst[d:], p) + } + } + return d +} + // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. // // It also assumes that: +// // len(dst) >= MaxEncodedLen(len(src)) && -// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize func encodeBlock(dst, src []byte) (d int) { // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. // The table element type is uint16, as s < sLimit and sLimit < len(src) diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md index c8f0f16f..bdd49c8b 100644 --- a/vendor/github.com/klauspost/compress/zstd/README.md +++ b/vendor/github.com/klauspost/compress/zstd/README.md @@ -12,6 +12,8 @@ The `zstd` package is provided as open source software using a Go standard licen Currently the package is heavily optimized for 64 bit processors and will be significantly slower on 32 bit processors. +For seekable zstd streams, see [this excellent package](https://github.com/SaveTheRbtz/zstd-seekable-format-go). + ## Installation Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`. @@ -78,6 +80,9 @@ of a stream. This is independent of the `WithEncoderConcurrency(n)`, but that is in the future. So if you want to limit concurrency for future updates, specify the concurrency you would like. +If you would like stream encoding to be done without spawning async goroutines, use `WithEncoderConcurrency(1)` +which will compress input as each block is completed, blocking on writes until each has completed. + You can specify your desired compression level using `WithEncoderLevel()` option. Currently only pre-defined compression settings can be specified. @@ -104,7 +109,8 @@ and seems to ignore concatenated streams, even though [it is part of the spec](h For compressing small blocks, the returned encoder has a function called `EncodeAll(src, dst []byte) []byte`. `EncodeAll` will encode all input in src and append it to dst. -This function can be called concurrently, but each call will only run on a single goroutine. +This function can be called concurrently. +Each call will only run on a same goroutine as the caller. Encoded blocks can be concatenated and the result will be the combined input stream. Data compressed with EncodeAll can be decoded with the Decoder, using either a stream or `DecodeAll`. @@ -149,10 +155,10 @@ http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip This package: file out level insize outsize millis mb/s -silesia.tar zskp 1 211947520 73101992 643 313.87 -silesia.tar zskp 2 211947520 67504318 969 208.38 -silesia.tar zskp 3 211947520 64595893 2007 100.68 -silesia.tar zskp 4 211947520 60995370 8825 22.90 +silesia.tar zskp 1 211947520 73821326 634 318.47 +silesia.tar zskp 2 211947520 67655404 1508 133.96 +silesia.tar zskp 3 211947520 64746933 3000 67.37 +silesia.tar zskp 4 211947520 60073508 16926 11.94 cgo zstd: silesia.tar zstd 1 211947520 73605392 543 371.56 @@ -161,94 +167,94 @@ silesia.tar zstd 6 211947520 62916450 1913 105.66 silesia.tar zstd 9 211947520 60212393 5063 39.92 gzip, stdlib/this package: -silesia.tar gzstd 1 211947520 80007735 1654 122.21 -silesia.tar gzkp 1 211947520 80136201 1152 175.45 +silesia.tar gzstd 1 211947520 80007735 1498 134.87 +silesia.tar gzkp 1 211947520 80088272 1009 200.31 GOB stream of binary data. Highly compressible. https://files.klauspost.com/compress/gob-stream.7z file out level insize outsize millis mb/s -gob-stream zskp 1 1911399616 235022249 3088 590.30 -gob-stream zskp 2 1911399616 205669791 3786 481.34 -gob-stream zskp 3 1911399616 175034659 9636 189.17 -gob-stream zskp 4 1911399616 165609838 50369 36.19 +gob-stream zskp 1 1911399616 233948096 3230 564.34 +gob-stream zskp 2 1911399616 203997694 4997 364.73 +gob-stream zskp 3 1911399616 173526523 13435 135.68 +gob-stream zskp 4 1911399616 162195235 47559 38.33 gob-stream zstd 1 1911399616 249810424 2637 691.26 gob-stream zstd 3 1911399616 208192146 3490 522.31 gob-stream zstd 6 1911399616 193632038 6687 272.56 gob-stream zstd 9 1911399616 177620386 16175 112.70 -gob-stream gzstd 1 1911399616 357382641 10251 177.82 -gob-stream gzkp 1 1911399616 359753026 5438 335.20 +gob-stream gzstd 1 1911399616 357382013 9046 201.49 +gob-stream gzkp 1 1911399616 359136669 4885 373.08 The test data for the Large Text Compression Benchmark is the first 10^9 bytes of the English Wikipedia dump on Mar. 3, 2006. http://mattmahoney.net/dc/textdata.html file out level insize outsize millis mb/s -enwik9 zskp 1 1000000000 343848582 3609 264.18 -enwik9 zskp 2 1000000000 317276632 5746 165.97 -enwik9 zskp 3 1000000000 292243069 12162 78.41 -enwik9 zskp 4 1000000000 262183768 82837 11.51 +enwik9 zskp 1 1000000000 343833605 3687 258.64 +enwik9 zskp 2 1000000000 317001237 7672 124.29 +enwik9 zskp 3 1000000000 291915823 15923 59.89 +enwik9 zskp 4 1000000000 261710291 77697 12.27 enwik9 zstd 1 1000000000 358072021 3110 306.65 enwik9 zstd 3 1000000000 313734672 4784 199.35 enwik9 zstd 6 1000000000 295138875 10290 92.68 enwik9 zstd 9 1000000000 278348700 28549 33.40 -enwik9 gzstd 1 1000000000 382578136 9604 99.30 -enwik9 gzkp 1 1000000000 383825945 6544 145.73 +enwik9 gzstd 1 1000000000 382578136 8608 110.78 +enwik9 gzkp 1 1000000000 382781160 5628 169.45 Highly compressible JSON file. https://files.klauspost.com/compress/github-june-2days-2019.json.zst file out level insize outsize millis mb/s -github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40 -github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96 -github-june-2days-2019.json zskp 3 6273951764 524340691 34043 175.75 -github-june-2days-2019.json zskp 4 6273951764 470320075 170190 35.16 +github-june-2days-2019.json zskp 1 6273951764 697439532 9789 611.17 +github-june-2days-2019.json zskp 2 6273951764 610876538 18553 322.49 +github-june-2days-2019.json zskp 3 6273951764 517662858 44186 135.41 +github-june-2days-2019.json zskp 4 6273951764 464617114 165373 36.18 github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00 github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57 github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18 github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16 -github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79 -github-june-2days-2019.json gzkp 1 6273951764 1125417694 21788 274.61 +github-june-2days-2019.json gzstd 1 6273951764 1164397768 26793 223.32 +github-june-2days-2019.json gzkp 1 6273951764 1120631856 17693 338.16 VM Image, Linux mint with a few installed applications: https://files.klauspost.com/compress/rawstudio-mint14.7z file out level insize outsize millis mb/s -rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84 -rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07 -rawstudio-mint14.tar zskp 3 8558382592 3158085214 77675 105.08 -rawstudio-mint14.tar zskp 4 8558382592 2965110639 857750 9.52 +rawstudio-mint14.tar zskp 1 8558382592 3718400221 18206 448.29 +rawstudio-mint14.tar zskp 2 8558382592 3326118337 37074 220.15 +rawstudio-mint14.tar zskp 3 8558382592 3163842361 87306 93.49 +rawstudio-mint14.tar zskp 4 8558382592 2970480650 783862 10.41 rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27 rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92 rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77 rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91 -rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40 -rawstudio-mint14.tar gzkp 1 8558382592 3962605659 45113 180.92 +rawstudio-mint14.tar gzstd 1 8558382592 3926234992 51345 158.96 +rawstudio-mint14.tar gzkp 1 8558382592 3960117298 36722 222.26 CSV data: https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst file out level insize outsize millis mb/s -nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35 -nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44 -nyc-taxi-data-10M.csv zskp 3 3325605752 530289687 25239 125.66 -nyc-taxi-data-10M.csv zskp 4 3325605752 476268884 135958 23.33 +nyc-taxi-data-10M.csv zskp 1 3325605752 641319332 9462 335.17 +nyc-taxi-data-10M.csv zskp 2 3325605752 588976126 17570 180.50 +nyc-taxi-data-10M.csv zskp 3 3325605752 529329260 32432 97.79 +nyc-taxi-data-10M.csv zskp 4 3325605752 474949772 138025 22.98 nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18 nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07 nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27 nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12 -nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83 -nyc-taxi-data-10M.csv gzkp 1 3325605752 922257165 16780 189.00 +nyc-taxi-data-10M.csv gzstd 1 3325605752 928654908 21270 149.11 +nyc-taxi-data-10M.csv gzkp 1 3325605752 922273214 13929 227.68 ``` ## Decompressor @@ -283,8 +289,13 @@ func Decompress(in io.Reader, out io.Writer) error { } ``` -It is important to use the "Close" function when you no longer need the Reader to stop running goroutines. -See "Allocation-less operation" below. +It is important to use the "Close" function when you no longer need the Reader to stop running goroutines, +when running with default settings. +Goroutines will exit once an error has been returned, including `io.EOF` at the end of a stream. + +Streams are decoded concurrently in 4 asynchronous stages to give the best possible throughput. +However, if you prefer synchronous decompression, use `WithDecoderConcurrency(1)` which will decompress data +as it is being requested only. For decoding buffers, it could look something like this: @@ -293,7 +304,7 @@ import "github.com/klauspost/compress/zstd" // Create a reader that caches decompressors. // For this operation type we supply a nil Reader. -var decoder, _ = zstd.NewReader(nil) +var decoder, _ = zstd.NewReader(nil, zstd.WithDecoderConcurrency(0)) // Decompress a buffer. We don't supply a destination buffer, // so it will be allocated by the decoder. @@ -303,9 +314,12 @@ func Decompress(src []byte) ([]byte, error) { ``` Both of these cases should provide the functionality needed. -The decoder can be used for *concurrent* decompression of multiple buffers. +The decoder can be used for *concurrent* decompression of multiple buffers. +By default 4 decompressors will be created. + It will only allow a certain number of concurrent operations to run. -To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder. +To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder. +It is possible to use `WithDecoderConcurrency(0)` to create GOMAXPROCS decoders. ### Dictionaries @@ -357,62 +371,48 @@ In this case no unneeded allocations should be made. The buffer decoder does everything on the same goroutine and does nothing concurrently. It can however decode several buffers concurrently. Use `WithDecoderConcurrency(n)` to limit that. -The stream decoder operates on +The stream decoder will create goroutines that: -* One goroutine reads input and splits the input to several block decoders. -* A number of decoders will decode blocks. -* A goroutine coordinates these blocks and sends history from one to the next. +1) Reads input and splits the input into blocks. +2) Decompression of literals. +3) Decompression of sequences. +4) Reconstruction of output stream. So effectively this also means the decoder will "read ahead" and prepare data to always be available for output. +The concurrency level will, for streams, determine how many blocks ahead the compression will start. + Since "blocks" are quite dependent on the output of the previous block stream decoding will only have limited concurrency. -In practice this means that concurrency is often limited to utilizing about 2 cores effectively. - - +In practice this means that concurrency is often limited to utilizing about 3 cores effectively. + ### Benchmarks -These are some examples of performance compared to [datadog cgo library](https://github.com/DataDog/zstd). - The first two are streaming decodes and the last are smaller inputs. - + +Running on AMD Ryzen 9 3950X 16-Core Processor. AMD64 assembly used. + ``` -BenchmarkDecoderSilesia-8 3 385000067 ns/op 550.51 MB/s 5498 B/op 8 allocs/op -BenchmarkDecoderSilesiaCgo-8 6 197666567 ns/op 1072.25 MB/s 270672 B/op 8 allocs/op - -BenchmarkDecoderEnwik9-8 1 2027001600 ns/op 493.34 MB/s 10496 B/op 18 allocs/op -BenchmarkDecoderEnwik9Cgo-8 2 979499200 ns/op 1020.93 MB/s 270672 B/op 8 allocs/op - -Concurrent performance: - -BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-16 28915 42469 ns/op 4340.07 MB/s 114 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-16 116505 9965 ns/op 11900.16 MB/s 16 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-16 8952 134272 ns/op 3588.70 MB/s 915 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-16 11820 102538 ns/op 4161.90 MB/s 594 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-16 34782 34184 ns/op 3661.88 MB/s 60 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-16 27712 43447 ns/op 3500.58 MB/s 99 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-16 62826 18750 ns/op 21845.10 MB/s 104 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-16 631545 1794 ns/op 57078.74 MB/s 2 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-16 1690140 712 ns/op 172938.13 MB/s 1 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-16 10432 113593 ns/op 6180.73 MB/s 1143 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/html.zst-16 113206 10671 ns/op 9596.27 MB/s 15 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-16 1530615 779 ns/op 5229.49 MB/s 0 B/op 0 allocs/op - -BenchmarkDecoder_DecodeAllParallelCgo/kppkn.gtb.zst-16 65217 16192 ns/op 11383.34 MB/s 46 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/geo.protodata.zst-16 292671 4039 ns/op 29363.19 MB/s 6 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/plrabn12.txt.zst-16 26314 46021 ns/op 10470.43 MB/s 293 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/lcet10.txt.zst-16 33897 34900 ns/op 12227.96 MB/s 205 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/asyoulik.txt.zst-16 104348 11433 ns/op 10949.01 MB/s 20 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/alice29.txt.zst-16 75949 15510 ns/op 9805.60 MB/s 32 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/html_x_4.zst-16 173910 6756 ns/op 60624.29 MB/s 37 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/paper-100k.pdf.zst-16 923076 1339 ns/op 76474.87 MB/s 1 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/fireworks.jpeg.zst-16 922920 1351 ns/op 91102.57 MB/s 2 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/urls.10K.zst-16 27649 43618 ns/op 16096.19 MB/s 407 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/html.zst-16 279073 4160 ns/op 24614.18 MB/s 6 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938 1579 ns/op 2581.71 MB/s 0 B/op 0 allocs/op +BenchmarkDecoderSilesia-32 5 206878840 ns/op 1024.50 MB/s 49808 B/op 43 allocs/op +BenchmarkDecoderEnwik9-32 1 1271809000 ns/op 786.28 MB/s 72048 B/op 52 allocs/op + +Concurrent blocks, performance: + +BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32 67356 17857 ns/op 10321.96 MB/s 22.48 pct 102 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32 266656 4421 ns/op 26823.21 MB/s 11.89 pct 19 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32 20992 56842 ns/op 8477.17 MB/s 39.90 pct 754 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32 27456 43932 ns/op 9714.01 MB/s 33.27 pct 524 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32 78432 15047 ns/op 8319.15 MB/s 40.34 pct 66 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32 65800 18436 ns/op 8249.63 MB/s 37.75 pct 88 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32 102993 11523 ns/op 35546.09 MB/s 3.637 pct 143 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32 1000000 1070 ns/op 95720.98 MB/s 80.53 pct 3 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32 749802 1752 ns/op 70272.35 MB/s 100.0 pct 5 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32 22640 52934 ns/op 13263.37 MB/s 26.25 pct 1014 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/html.zst-32 226412 5232 ns/op 19572.27 MB/s 14.49 pct 20 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32 923041 1276 ns/op 3194.71 MB/s 31.26 pct 0 B/op 0 allocs/op ``` -This reflects the performance around May 2020, but this may be out of date. +This reflects the performance around May 2022, but this may be out of date. ## Zstd inside ZIP files diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go index 85445853..97299d49 100644 --- a/vendor/github.com/klauspost/compress/zstd/bitreader.go +++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go @@ -7,6 +7,7 @@ package zstd import ( "encoding/binary" "errors" + "fmt" "io" "math/bits" ) @@ -50,16 +51,16 @@ func (b *bitReader) getBits(n uint8) int { if n == 0 /*|| b.bitsRead >= 64 */ { return 0 } - return b.getBitsFast(n) + return int(b.get32BitsFast(n)) } -// getBitsFast requires that at least one bit is requested every time. +// get32BitsFast requires that at least one bit is requested every time. // There are no checks if the buffer is filled. -func (b *bitReader) getBitsFast(n uint8) int { +func (b *bitReader) get32BitsFast(n uint8) uint32 { const regMask = 64 - 1 v := uint32((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) b.bitsRead += n - return int(v) + return v } // fillFast() will make sure at least 32 bits are available. @@ -125,6 +126,9 @@ func (b *bitReader) remain() uint { func (b *bitReader) close() error { // Release reference. b.in = nil + if !b.finished() { + return fmt.Errorf("%d extra bits on block, should be 0", b.remain()) + } if b.bitsRead > 64 { return io.ErrUnexpectedEOF } diff --git a/vendor/github.com/klauspost/compress/zstd/bitwriter.go b/vendor/github.com/klauspost/compress/zstd/bitwriter.go index 303ae90f..78b3c61b 100644 --- a/vendor/github.com/klauspost/compress/zstd/bitwriter.go +++ b/vendor/github.com/klauspost/compress/zstd/bitwriter.go @@ -5,8 +5,6 @@ package zstd -import "fmt" - // bitWriter will write bits. // First bit will be LSB of the first byte of output. type bitWriter struct { @@ -38,7 +36,7 @@ func (b *bitWriter) addBits16NC(value uint16, bits uint8) { b.nBits += bits } -// addBits32NC will add up to 32 bits. +// addBits32NC will add up to 31 bits. // It will not check if there is space for them, // so the caller must ensure that it has flushed recently. func (b *bitWriter) addBits32NC(value uint32, bits uint8) { @@ -46,6 +44,26 @@ func (b *bitWriter) addBits32NC(value uint32, bits uint8) { b.nBits += bits } +// addBits64NC will add up to 64 bits. +// There must be space for 32 bits. +func (b *bitWriter) addBits64NC(value uint64, bits uint8) { + if bits <= 31 { + b.addBits32Clean(uint32(value), bits) + return + } + b.addBits32Clean(uint32(value), 32) + b.flush32() + b.addBits32Clean(uint32(value>>32), bits-32) +} + +// addBits32Clean will add up to 32 bits. +// It will not check if there is space for them. +// The input must not contain more bits than specified. +func (b *bitWriter) addBits32Clean(value uint32, bits uint8) { + b.bitContainer |= uint64(value) << (b.nBits & 63) + b.nBits += bits +} + // addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated. // It will not check if there is space for them, so the caller must ensure that it has flushed recently. func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { @@ -53,80 +71,6 @@ func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { b.nBits += bits } -// flush will flush all pending full bytes. -// There will be at least 56 bits available for writing when this has been called. -// Using flush32 is faster, but leaves less space for writing. -func (b *bitWriter) flush() { - v := b.nBits >> 3 - switch v { - case 0: - case 1: - b.out = append(b.out, - byte(b.bitContainer), - ) - case 2: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - ) - case 3: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - ) - case 4: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - ) - case 5: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - ) - case 6: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - byte(b.bitContainer>>40), - ) - case 7: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - byte(b.bitContainer>>40), - byte(b.bitContainer>>48), - ) - case 8: - b.out = append(b.out, - byte(b.bitContainer), - byte(b.bitContainer>>8), - byte(b.bitContainer>>16), - byte(b.bitContainer>>24), - byte(b.bitContainer>>32), - byte(b.bitContainer>>40), - byte(b.bitContainer>>48), - byte(b.bitContainer>>56), - ) - default: - panic(fmt.Errorf("bits (%d) > 64", b.nBits)) - } - b.bitContainer >>= v << 3 - b.nBits &= 7 -} - // flush32 will flush out, so there are at least 32 bits available for writing. func (b *bitWriter) flush32() { if b.nBits < 32 { diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index 8a98c456..9f17ce60 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -5,9 +5,14 @@ package zstd import ( + "bytes" + "encoding/binary" "errors" "fmt" + "hash/crc32" "io" + "os" + "path/filepath" "sync" "github.com/klauspost/compress/huff0" @@ -38,14 +43,14 @@ const ( // maxCompressedBlockSize is the biggest allowed compressed block size (128KB) maxCompressedBlockSize = 128 << 10 + compressedBlockOverAlloc = 16 + maxCompressedBlockSizeAlloc = 128<<10 + compressedBlockOverAlloc + // Maximum possible block size (all Raw+Uncompressed). maxBlockSize = (1 << 21) - 1 - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals_section_header - maxCompressedLiteralSize = 1 << 18 - maxRLELiteralSize = 1 << 20 - maxMatchLen = 131074 - maxSequences = 0x7f00 + 0xffff + maxMatchLen = 131074 + maxSequences = 0x7f00 + 0xffff // We support slightly less than the reference decoder to be able to // use ints on 32 bit archs. @@ -76,20 +81,28 @@ type blockDec struct { // Window size of the block. WindowSize uint64 - history chan *history - input chan struct{} - result chan decodeOutput - sequenceBuf []seq - err error - decWG sync.WaitGroup + err error + + // Check against this crc, if hasCRC is true. + checkCRC uint32 + hasCRC bool // Frame to use for singlethreaded decoding. // Should not be used by the decoder itself since parent may be another frame. localFrame *frameDec + sequence []seqVals + + async struct { + newHist *history + literals []byte + seqData []byte + seqSize int // Size of uncompressed sequences + fcs uint64 + } + // Block is RLE, this is the size. RLESize uint32 - tmp [4]byte Type blockType @@ -109,13 +122,8 @@ func (b *blockDec) String() string { func newBlockDec(lowMem bool) *blockDec { b := blockDec{ - lowMem: lowMem, - result: make(chan decodeOutput, 1), - input: make(chan struct{}, 1), - history: make(chan *history, 1), + lowMem: lowMem, } - b.decWG.Add(1) - go b.startDecoder() return &b } @@ -133,11 +141,17 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { b.Type = blockType((bh >> 1) & 3) // find size. cSize := int(bh >> 3) - maxSize := maxBlockSize + maxSize := maxCompressedBlockSizeAlloc switch b.Type { case blockTypeReserved: return ErrReservedBlockType case blockTypeRLE: + if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) { + if debugDecoder { + printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b) + } + return ErrWindowSizeExceeded + } b.RLESize = uint32(cSize) if b.lowMem { maxSize = cSize @@ -148,9 +162,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { println("Data size on stream:", cSize) } b.RLESize = 0 - maxSize = maxCompressedBlockSize + maxSize = maxCompressedBlockSizeAlloc if windowSize < maxCompressedBlockSize && b.lowMem { - maxSize = int(windowSize) + maxSize = int(windowSize) + compressedBlockOverAlloc } if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize { if debugDecoder { @@ -158,7 +172,19 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { } return ErrCompressedSizeTooBig } + // Empty compressed blocks must at least be 2 bytes + // for Literals_Block_Type and one for Sequences_Section_Header. + if cSize < 2 { + return ErrBlockTooSmall + } case blockTypeRaw: + if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) { + if debugDecoder { + printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b) + } + return ErrWindowSizeExceeded + } + b.RLESize = 0 // We do not need a destination for raw blocks. maxSize = -1 @@ -167,16 +193,14 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { } // Read block data. - if cap(b.dataStorage) < cSize { + if _, ok := br.(*byteBuf); !ok && cap(b.dataStorage) < cSize { + // byteBuf doesn't need a destination buffer. if b.lowMem || cSize > maxCompressedBlockSize { - b.dataStorage = make([]byte, 0, cSize) + b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc) } else { - b.dataStorage = make([]byte, 0, maxCompressedBlockSize) + b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc) } } - if cap(b.dst) <= maxSize { - b.dst = make([]byte, 0, maxSize+1) - } b.data, err = br.readBig(cSize, b.dataStorage) if err != nil { if debugDecoder { @@ -185,6 +209,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { } return err } + if cap(b.dst) <= maxSize { + b.dst = make([]byte, 0, maxSize+1) + } return nil } @@ -193,85 +220,14 @@ func (b *blockDec) sendErr(err error) { b.Last = true b.Type = blockTypeReserved b.err = err - b.input <- struct{}{} } // Close will release resources. // Closed blockDec cannot be reset. func (b *blockDec) Close() { - close(b.input) - close(b.history) - close(b.result) - b.decWG.Wait() } -// decodeAsync will prepare decoding the block when it receives input. -// This will separate output and history. -func (b *blockDec) startDecoder() { - defer b.decWG.Done() - for range b.input { - //println("blockDec: Got block input") - switch b.Type { - case blockTypeRLE: - if cap(b.dst) < int(b.RLESize) { - if b.lowMem { - b.dst = make([]byte, b.RLESize) - } else { - b.dst = make([]byte, maxBlockSize) - } - } - o := decodeOutput{ - d: b, - b: b.dst[:b.RLESize], - err: nil, - } - v := b.data[0] - for i := range o.b { - o.b[i] = v - } - hist := <-b.history - hist.append(o.b) - b.result <- o - case blockTypeRaw: - o := decodeOutput{ - d: b, - b: b.data, - err: nil, - } - hist := <-b.history - hist.append(o.b) - b.result <- o - case blockTypeCompressed: - b.dst = b.dst[:0] - err := b.decodeCompressed(nil) - o := decodeOutput{ - d: b, - b: b.dst, - err: err, - } - if debugDecoder { - println("Decompressed to", len(b.dst), "bytes, error:", err) - } - b.result <- o - case blockTypeReserved: - // Used for returning errors. - <-b.history - b.result <- decodeOutput{ - d: b, - b: nil, - err: b.err, - } - default: - panic("Invalid block type") - } - if debugDecoder { - println("blockDec: Finished block") - } - } -} - -// decodeAsync will prepare decoding the block when it receives the history. -// If history is provided, it will not fetch it from the channel. +// decodeBuf func (b *blockDec) decodeBuf(hist *history) error { switch b.Type { case blockTypeRLE: @@ -279,7 +235,7 @@ func (b *blockDec) decodeBuf(hist *history) error { if b.lowMem { b.dst = make([]byte, b.RLESize) } else { - b.dst = make([]byte, maxBlockSize) + b.dst = make([]byte, maxCompressedBlockSize) } } b.dst = b.dst[:b.RLESize] @@ -294,14 +250,23 @@ func (b *blockDec) decodeBuf(hist *history) error { return nil case blockTypeCompressed: saved := b.dst - b.dst = hist.b - hist.b = nil + // Append directly to history + if hist.ignoreBuffer == 0 { + b.dst = hist.b + hist.b = nil + } else { + b.dst = b.dst[:0] + } err := b.decodeCompressed(hist) if debugDecoder { println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err) } - hist.b = b.dst - b.dst = saved + if hist.ignoreBuffer == 0 { + hist.b = b.dst + b.dst = saved + } else { + hist.appendKeep(b.dst) + } return err case blockTypeReserved: // Used for returning errors. @@ -311,30 +276,18 @@ func (b *blockDec) decodeBuf(hist *history) error { } } -// decodeCompressed will start decompressing a block. -// If no history is supplied the decoder will decodeAsync as much as possible -// before fetching from blockDec.history -func (b *blockDec) decodeCompressed(hist *history) error { - in := b.data - delayedHistory := hist == nil - - if delayedHistory { - // We must always grab history. - defer func() { - if hist == nil { - <-b.history - } - }() - } +func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err error) { // There must be at least one byte for Literals_Block_Type and one for Sequences_Section_Header if len(in) < 2 { - return ErrBlockTooSmall + return in, ErrBlockTooSmall } + litType := literalsBlockType(in[0] & 3) var litRegenSize int var litCompSize int sizeFormat := (in[0] >> 2) & 3 var fourStreams bool + var literals []byte switch litType { case literalsBlockRaw, literalsBlockRLE: switch sizeFormat { @@ -350,7 +303,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { // Regenerated_Size uses 20 bits (0-1048575). Literals_Section_Header uses 3 bytes. if len(in) < 3 { println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) + (int(in[2]) << 12) in = in[3:] @@ -361,7 +314,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { // Both Regenerated_Size and Compressed_Size use 10 bits (0-1023). if len(in) < 3 { println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) litRegenSize = int(n & 1023) @@ -372,7 +325,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { fourStreams = true if len(in) < 4 { println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) litRegenSize = int(n & 16383) @@ -382,7 +335,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { fourStreams = true if len(in) < 5 { println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) + (uint64(in[4]) << 28) litRegenSize = int(n & 262143) @@ -393,13 +346,15 @@ func (b *blockDec) decodeCompressed(hist *history) error { if debugDecoder { println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams) } - var literals []byte - var huff *huff0.Scratch + if litRegenSize > int(b.WindowSize) || litRegenSize > maxCompressedBlockSize { + return in, ErrWindowSizeExceeded + } + switch litType { case literalsBlockRaw: if len(in) < litRegenSize { println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litRegenSize) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } literals = in[:litRegenSize] in = in[litRegenSize:] @@ -407,19 +362,13 @@ func (b *blockDec) decodeCompressed(hist *history) error { case literalsBlockRLE: if len(in) < 1 { println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", 1) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } if cap(b.literalBuf) < litRegenSize { if b.lowMem { - b.literalBuf = make([]byte, litRegenSize) + b.literalBuf = make([]byte, litRegenSize, litRegenSize+compressedBlockOverAlloc) } else { - if litRegenSize > maxCompressedLiteralSize { - // Exceptional - b.literalBuf = make([]byte, litRegenSize) - } else { - b.literalBuf = make([]byte, litRegenSize, maxCompressedLiteralSize) - - } + b.literalBuf = make([]byte, litRegenSize, maxCompressedBlockSize+compressedBlockOverAlloc) } } literals = b.literalBuf[:litRegenSize] @@ -434,7 +383,7 @@ func (b *blockDec) decodeCompressed(hist *history) error { case literalsBlockTreeless: if len(in) < litCompSize { println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } // Store compressed literals, so we defer decoding until we get history. literals = in[:litCompSize] @@ -442,31 +391,68 @@ func (b *blockDec) decodeCompressed(hist *history) error { if debugDecoder { printf("Found %d compressed literals\n", litCompSize) } + huff := hist.huffTree + if huff == nil { + return in, errors.New("literal block was treeless, but no history was defined") + } + // Ensure we have space to store it. + if cap(b.literalBuf) < litRegenSize { + if b.lowMem { + b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc) + } else { + b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc) + } + } + var err error + // Use our out buffer. + huff.MaxDecodedSize = litRegenSize + if fourStreams { + literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) + } else { + literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals) + } + // Make sure we don't leak our literals buffer + if err != nil { + println("decompressing literals:", err) + return in, err + } + if len(literals) != litRegenSize { + return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) + } + case literalsBlockCompressed: if len(in) < litCompSize { println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize) - return ErrBlockTooSmall + return in, ErrBlockTooSmall } literals = in[:litCompSize] in = in[litCompSize:] - huff = huffDecoderPool.Get().(*huff0.Scratch) - var err error // Ensure we have space to store it. if cap(b.literalBuf) < litRegenSize { if b.lowMem { - b.literalBuf = make([]byte, 0, litRegenSize) + b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc) } else { - b.literalBuf = make([]byte, 0, maxCompressedLiteralSize) + b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc) } } - if huff == nil { - huff = &huff0.Scratch{} + huff := hist.huffTree + if huff == nil || (hist.dict != nil && huff == hist.dict.litEnc) { + huff = huffDecoderPool.Get().(*huff0.Scratch) + if huff == nil { + huff = &huff0.Scratch{} + } + } + var err error + if debugDecoder { + println("huff table input:", len(literals), "CRC:", crc32.ChecksumIEEE(literals)) } huff, literals, err = huff0.ReadTable(literals, huff) if err != nil { println("reading huffman table:", err) - return err + return in, err } + hist.huffTree = huff + huff.MaxDecodedSize = litRegenSize // Use our out buffer. if fourStreams { literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) @@ -475,27 +461,63 @@ func (b *blockDec) decodeCompressed(hist *history) error { } if err != nil { println("decoding compressed literals:", err) - return err + return in, err } // Make sure we don't leak our literals buffer if len(literals) != litRegenSize { - return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) + return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) } + // Re-cap to get extra size. + literals = b.literalBuf[:len(literals)] if debugDecoder { printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize) } } + hist.decoders.literals = literals + return in, nil +} + +// decodeCompressed will start decompressing a block. +func (b *blockDec) decodeCompressed(hist *history) error { + in := b.data + in, err := b.decodeLiterals(in, hist) + if err != nil { + return err + } + err = b.prepareSequences(in, hist) + if err != nil { + return err + } + if hist.decoders.nSeqs == 0 { + b.dst = append(b.dst, hist.decoders.literals...) + return nil + } + before := len(hist.decoders.out) + err = hist.decoders.decodeSync(hist.b[hist.ignoreBuffer:]) + if err != nil { + return err + } + if hist.decoders.maxSyncLen > 0 { + hist.decoders.maxSyncLen += uint64(before) + hist.decoders.maxSyncLen -= uint64(len(hist.decoders.out)) + } + b.dst = hist.decoders.out + hist.recentOffsets = hist.decoders.prevOffset + return nil +} +func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) { + if debugDecoder { + printf("prepareSequences: %d byte(s) input\n", len(in)) + } // Decode Sequences // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section if len(in) < 1 { return ErrBlockTooSmall } + var nSeqs int seqHeader := in[0] - nSeqs := 0 switch { - case seqHeader == 0: - in = in[1:] case seqHeader < 128: nSeqs = int(seqHeader) in = in[1:] @@ -512,19 +534,16 @@ func (b *blockDec) decodeCompressed(hist *history) error { nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8) in = in[3:] } - // Allocate sequences - if cap(b.sequenceBuf) < nSeqs { - if b.lowMem { - b.sequenceBuf = make([]seq, nSeqs) - } else { - // Allocate max - b.sequenceBuf = make([]seq, nSeqs, maxSequences) + if nSeqs == 0 && len(in) != 0 { + // When no sequences, there should not be any more data... + if debugDecoder { + printf("prepareSequences: 0 sequences, but %d byte(s) left on stream\n", len(in)) } - } else { - // Reuse buffer - b.sequenceBuf = b.sequenceBuf[:nSeqs] + return ErrUnexpectedBlockSize } - var seqs = &sequenceDecs{} + + var seqs = &hist.decoders + seqs.nSeqs = nSeqs if nSeqs > 0 { if len(in) < 1 { return ErrBlockTooSmall @@ -553,6 +572,9 @@ func (b *blockDec) decodeCompressed(hist *history) error { } switch mode { case compModePredefined: + if seq.fse != nil && !seq.fse.preDefined { + fseDecoderPool.Put(seq.fse) + } seq.fse = &fsePredef[i] case compModeRLE: if br.remain() < 1 { @@ -560,34 +582,36 @@ func (b *blockDec) decodeCompressed(hist *history) error { } v := br.Uint8() br.advance(1) - dec := fseDecoderPool.Get().(*fseDecoder) + if seq.fse == nil || seq.fse.preDefined { + seq.fse = fseDecoderPool.Get().(*fseDecoder) + } symb, err := decSymbolValue(v, symbolTableX[i]) if err != nil { printf("RLE Transform table (%v) error: %v", tableIndex(i), err) return err } - dec.setRLE(symb) - seq.fse = dec + seq.fse.setRLE(symb) if debugDecoder { - printf("RLE set to %+v, code: %v", symb, v) + printf("RLE set to 0x%x, code: %v", symb, v) } case compModeFSE: println("Reading table for", tableIndex(i)) - dec := fseDecoderPool.Get().(*fseDecoder) - err := dec.readNCount(&br, uint16(maxTableSymbol[i])) + if seq.fse == nil || seq.fse.preDefined { + seq.fse = fseDecoderPool.Get().(*fseDecoder) + } + err := seq.fse.readNCount(&br, uint16(maxTableSymbol[i])) if err != nil { println("Read table error:", err) return err } - err = dec.transform(symbolTableX[i]) + err = seq.fse.transform(symbolTableX[i]) if err != nil { println("Transform table error:", err) return err } if debugDecoder { - println("Read table ok", "symbolLen:", dec.symbolLen) + println("Read table ok", "symbolLen:", seq.fse.symbolLen) } - seq.fse = dec case compModeRepeat: seq.repeat = true } @@ -597,140 +621,106 @@ func (b *blockDec) decodeCompressed(hist *history) error { } in = br.unread() } - - // Wait for history. - // All time spent after this is critical since it is strictly sequential. - if hist == nil { - hist = <-b.history - if hist.error { - return ErrDecoderClosed - } - } - - // Decode treeless literal block. - if litType == literalsBlockTreeless { - // TODO: We could send the history early WITHOUT the stream history. - // This would allow decoding treeless literals before the byte history is available. - // Silencia stats: Treeless 4393, with: 32775, total: 37168, 11% treeless. - // So not much obvious gain here. - - if hist.huffTree == nil { - return errors.New("literal block was treeless, but no history was defined") - } - // Ensure we have space to store it. - if cap(b.literalBuf) < litRegenSize { - if b.lowMem { - b.literalBuf = make([]byte, 0, litRegenSize) - } else { - b.literalBuf = make([]byte, 0, maxCompressedLiteralSize) - } - } - var err error - // Use our out buffer. - huff = hist.huffTree - if fourStreams { - literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals) - } else { - literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals) - } - // Make sure we don't leak our literals buffer - if err != nil { - println("decompressing literals:", err) - return err - } - if len(literals) != litRegenSize { - return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) - } - } else { - if hist.huffTree != nil && huff != nil { - if hist.dict == nil || hist.dict.litEnc != hist.huffTree { - huffDecoderPool.Put(hist.huffTree) - } - hist.huffTree = nil - } - } - if huff != nil { - hist.huffTree = huff - } if debugDecoder { - println("Final literals:", len(literals), "hash:", xxhash.Sum64(literals), "and", nSeqs, "sequences.") + println("Literals:", len(seqs.literals), "hash:", xxhash.Sum64(seqs.literals), "and", seqs.nSeqs, "sequences.") } if nSeqs == 0 { - // Decompressed content is defined entirely as Literals Section content. - b.dst = append(b.dst, literals...) - if delayedHistory { - hist.append(literals) + if len(b.sequence) > 0 { + b.sequence = b.sequence[:0] } return nil } + br := seqs.br + if br == nil { + br = &bitReader{} + } + if err := br.init(in); err != nil { + return err + } - seqs, err := seqs.mergeHistory(&hist.decoders) - if err != nil { + if err := seqs.initialize(br, hist, b.dst); err != nil { + println("initializing sequences:", err) return err } - if debugDecoder { - println("History merged ok") + // Extract blocks... + if false && hist.dict == nil { + fatalErr := func(err error) { + if err != nil { + panic(err) + } + } + fn := fmt.Sprintf("n-%d-lits-%d-prev-%d-%d-%d-win-%d.blk", hist.decoders.nSeqs, len(hist.decoders.literals), hist.recentOffsets[0], hist.recentOffsets[1], hist.recentOffsets[2], hist.windowSize) + var buf bytes.Buffer + fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.litLengths.fse)) + fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse)) + fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse)) + buf.Write(in) + os.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm) } - br := &bitReader{} - if err := br.init(in); err != nil { - return err + + return nil +} + +func (b *blockDec) decodeSequences(hist *history) error { + if cap(b.sequence) < hist.decoders.nSeqs { + if b.lowMem { + b.sequence = make([]seqVals, 0, hist.decoders.nSeqs) + } else { + b.sequence = make([]seqVals, 0, 0x7F00+0xffff) + } + } + b.sequence = b.sequence[:hist.decoders.nSeqs] + if hist.decoders.nSeqs == 0 { + hist.decoders.seqSize = len(hist.decoders.literals) + return nil } + hist.decoders.windowSize = hist.windowSize + hist.decoders.prevOffset = hist.recentOffsets - // TODO: Investigate if sending history without decoders are faster. - // This would allow the sequences to be decoded async and only have to construct stream history. - // If only recent offsets were not transferred, this would be an obvious win. - // Also, if first 3 sequences don't reference recent offsets, all sequences can be decoded. + err := hist.decoders.decode(b.sequence) + hist.recentOffsets = hist.decoders.prevOffset + return err +} +func (b *blockDec) executeSequences(hist *history) error { hbytes := hist.b if len(hbytes) > hist.windowSize { hbytes = hbytes[len(hbytes)-hist.windowSize:] - // We do not need history any more. + // We do not need history anymore. if hist.dict != nil { hist.dict.content = nil } } - - if err := seqs.initialize(br, hist, literals, b.dst); err != nil { - println("initializing sequences:", err) - return err - } - - err = seqs.decode(nSeqs, br, hbytes) + hist.decoders.windowSize = hist.windowSize + hist.decoders.out = b.dst[:0] + err := hist.decoders.execute(b.sequence, hbytes) if err != nil { return err } - if !br.finished() { - return fmt.Errorf("%d extra bits on block, should be 0", br.remain()) - } + return b.updateHistory(hist) +} - err = br.close() - if err != nil { - printf("Closing sequences: %v, %+v\n", err, *br) - } +func (b *blockDec) updateHistory(hist *history) error { if len(b.data) > maxCompressedBlockSize { return fmt.Errorf("compressed block size too large (%d)", len(b.data)) } // Set output and release references. - b.dst = seqs.out - seqs.out, seqs.literals, seqs.hist = nil, nil, nil + b.dst = hist.decoders.out + hist.recentOffsets = hist.decoders.prevOffset - if !delayedHistory { - // If we don't have delayed history, no need to update. - hist.recentOffsets = seqs.prevOffset - return nil - } if b.Last { // if last block we don't care about history. println("Last block, no history returned") hist.b = hist.b[:0] return nil + } else { + hist.append(b.dst) + if debugDecoder { + println("Finished block with ", len(b.sequence), "sequences. Added", len(b.dst), "to history, now length", len(hist.b)) + } } - hist.append(b.dst) - hist.recentOffsets = seqs.prevOffset - if debugDecoder { - println("Finished block with literals:", len(literals), "and", nSeqs, "sequences.") - } + hist.decoders.out, hist.decoders.literals = nil, nil return nil } diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go index 3df185ee..fd4a36f7 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -51,7 +51,7 @@ func (b *blockEnc) init() { if cap(b.literals) < maxCompressedBlockSize { b.literals = make([]byte, 0, maxCompressedBlockSize) } - const defSeqs = 200 + const defSeqs = 2000 if cap(b.sequences) < defSeqs { b.sequences = make([]seq, 0, defSeqs) } @@ -426,7 +426,7 @@ func fuzzFseEncoder(data []byte) int { return 0 } enc := fseEncoder{} - hist := enc.Histogram()[:256] + hist := enc.Histogram() maxSym := uint8(0) for i, v := range data { v = v & 63 @@ -473,7 +473,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { return b.encodeLits(b.literals, rawAllLits) } // We want some difference to at least account for the headers. - saved := b.size - len(b.literals) - (b.size >> 5) + saved := b.size - len(b.literals) - (b.size >> 6) if saved < 16 { if org == nil { return errIncompressible @@ -722,52 +722,53 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB) } seq-- - if llEnc.maxBits+mlEnc.maxBits+ofEnc.maxBits <= 32 { - // No need to flush (common) - for seq >= 0 { - s = b.sequences[seq] - wr.flush32() - llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode] - // tabelog max is 8 for all. - of.encode(ofB) - ml.encode(mlB) - ll.encode(llB) - wr.flush32() - - // We checked that all can stay within 32 bits - wr.addBits32NC(s.litLen, llB.outBits) - wr.addBits32NC(s.matchLen, mlB.outBits) - wr.addBits32NC(s.offset, ofB.outBits) - - if debugSequences { - println("Encoded seq", seq, s) - } - - seq-- - } - } else { - for seq >= 0 { - s = b.sequences[seq] - wr.flush32() - llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode] - // tabelog max is below 8 for each. - of.encode(ofB) - ml.encode(mlB) - ll.encode(llB) - wr.flush32() - - // ml+ll = max 32 bits total - wr.addBits32NC(s.litLen, llB.outBits) - wr.addBits32NC(s.matchLen, mlB.outBits) - wr.flush32() - wr.addBits32NC(s.offset, ofB.outBits) - - if debugSequences { - println("Encoded seq", seq, s) - } - - seq-- - } + // Store sequences in reverse... + for seq >= 0 { + s = b.sequences[seq] + + ofB := ofTT[s.ofCode] + wr.flush32() // tablelog max is below 8 for each, so it will fill max 24 bits. + //of.encode(ofB) + nbBitsOut := (uint32(of.state) + ofB.deltaNbBits) >> 16 + dstState := int32(of.state>>(nbBitsOut&15)) + int32(ofB.deltaFindState) + wr.addBits16NC(of.state, uint8(nbBitsOut)) + of.state = of.stateTable[dstState] + + // Accumulate extra bits. + outBits := ofB.outBits & 31 + extraBits := uint64(s.offset & bitMask32[outBits]) + extraBitsN := outBits + + mlB := mlTT[s.mlCode] + //ml.encode(mlB) + nbBitsOut = (uint32(ml.state) + mlB.deltaNbBits) >> 16 + dstState = int32(ml.state>>(nbBitsOut&15)) + int32(mlB.deltaFindState) + wr.addBits16NC(ml.state, uint8(nbBitsOut)) + ml.state = ml.stateTable[dstState] + + outBits = mlB.outBits & 31 + extraBits = extraBits<> 16 + dstState = int32(ll.state>>(nbBitsOut&15)) + int32(llB.deltaFindState) + wr.addBits16NC(ll.state, uint8(nbBitsOut)) + ll.state = ll.stateTable[dstState] + + outBits = llB.outBits & 31 + extraBits = extraBits<= b.size { - // Maybe even add a bigger margin. + // Discard and encode as raw block. + b.output = b.encodeRawTo(b.output[:bhOffset], org) + b.popOffsets() b.litEnc.Reuse = huff0.ReusePolicyNone - return errIncompressible + return nil } // Size is output minus block header. @@ -801,14 +805,13 @@ func (b *blockEnc) genCodes() { // nothing to do return } - if len(b.sequences) > math.MaxUint16 { panic("can only encode up to 64K sequences") } // No bounds checks after here: - llH := b.coders.llEnc.Histogram()[:256] - ofH := b.coders.ofEnc.Histogram()[:256] - mlH := b.coders.mlEnc.Histogram()[:256] + llH := b.coders.llEnc.Histogram() + ofH := b.coders.ofEnc.Histogram() + mlH := b.coders.mlEnc.Histogram() for i := range llH { llH[i] = 0 } @@ -820,7 +823,8 @@ func (b *blockEnc) genCodes() { } var llMax, ofMax, mlMax uint8 - for i, seq := range b.sequences { + for i := range b.sequences { + seq := &b.sequences[i] v := llCode(seq.litLen) seq.llCode = v llH[v]++ @@ -844,7 +848,6 @@ func (b *blockEnc) genCodes() { panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen)) } } - b.sequences[i] = seq } maxCount := func(a []uint32) int { var max uint32 diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go index aab71c6c..55a38855 100644 --- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go +++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go @@ -7,7 +7,6 @@ package zstd import ( "fmt" "io" - "io/ioutil" ) type byteBuffer interface { @@ -23,7 +22,7 @@ type byteBuffer interface { readByte() (byte, error) // Skip n bytes. - skipN(n int) error + skipN(n int64) error } // in-memory buffer @@ -52,23 +51,22 @@ func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) { return r, nil } -func (b *byteBuf) remain() []byte { - return *b -} - func (b *byteBuf) readByte() (byte, error) { bb := *b if len(bb) < 1 { - return 0, nil + return 0, io.ErrUnexpectedEOF } r := bb[0] *b = bb[1:] return r, nil } -func (b *byteBuf) skipN(n int) error { +func (b *byteBuf) skipN(n int64) error { bb := *b - if len(bb) < n { + if n < 0 { + return fmt.Errorf("negative skip (%d) requested", n) + } + if int64(len(bb)) < n { return io.ErrUnexpectedEOF } *b = bb[n:] @@ -111,8 +109,11 @@ func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) { } func (r *readerWrapper) readByte() (byte, error) { - n2, err := r.r.Read(r.tmp[:1]) + n2, err := io.ReadFull(r.r, r.tmp[:1]) if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } return 0, err } if n2 != 1 { @@ -121,9 +122,9 @@ func (r *readerWrapper) readByte() (byte, error) { return r.tmp[0], nil } -func (r *readerWrapper) skipN(n int) error { - n2, err := io.CopyN(ioutil.Discard, r.r, int64(n)) - if n2 != int64(n) { +func (r *readerWrapper) skipN(n int64) error { + n2, err := io.CopyN(io.Discard, r.r, n) + if n2 != n { err = io.ErrUnexpectedEOF } return err diff --git a/vendor/github.com/klauspost/compress/zstd/bytereader.go b/vendor/github.com/klauspost/compress/zstd/bytereader.go index 2c4fca17..0e59a242 100644 --- a/vendor/github.com/klauspost/compress/zstd/bytereader.go +++ b/vendor/github.com/klauspost/compress/zstd/bytereader.go @@ -13,12 +13,6 @@ type byteReader struct { off int } -// init will initialize the reader and set the input. -func (b *byteReader) init(in []byte) { - b.b = in - b.off = 0 -} - // advance the stream b n bytes. func (b *byteReader) advance(n uint) { b.off += int(n) diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go index 69736e8d..f6a24097 100644 --- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go +++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go @@ -4,7 +4,7 @@ package zstd import ( - "bytes" + "encoding/binary" "errors" "io" ) @@ -15,18 +15,50 @@ const HeaderMaxSize = 14 + 3 // Header contains information about the first frame and block within that. type Header struct { - // Window Size the window of data to keep while decoding. - // Will only be set if HasFCS is false. - WindowSize uint64 + // SingleSegment specifies whether the data is to be decompressed into a + // single contiguous memory segment. + // It implies that WindowSize is invalid and that FrameContentSize is valid. + SingleSegment bool - // Frame content size. - // Expected size of the entire frame. - FrameContentSize uint64 + // WindowSize is the window of data to keep while decoding. + // Will only be set if SingleSegment is false. + WindowSize uint64 // Dictionary ID. // If 0, no dictionary. DictionaryID uint32 + // HasFCS specifies whether FrameContentSize has a valid value. + HasFCS bool + + // FrameContentSize is the expected uncompressed size of the entire frame. + FrameContentSize uint64 + + // Skippable will be true if the frame is meant to be skipped. + // This implies that FirstBlock.OK is false. + Skippable bool + + // SkippableID is the user-specific ID for the skippable frame. + // Valid values are between 0 to 15, inclusive. + SkippableID int + + // SkippableSize is the length of the user data to skip following + // the header. + SkippableSize uint32 + + // HeaderSize is the raw size of the frame header. + // + // For normal frames, it includes the size of the magic number and + // the size of the header (per section 3.1.1.1). + // It does not include the size for any data blocks (section 3.1.1.2) nor + // the size for the trailing content checksum. + // + // For skippable frames, this counts the size of the magic number + // along with the size of the size field of the payload. + // It does not include the size of the skippable payload itself. + // The total frame size is the HeaderSize plus the SkippableSize. + HeaderSize int + // First block information. FirstBlock struct { // OK will be set if first block could be decoded. @@ -51,17 +83,9 @@ type Header struct { CompressedSize int } - // Skippable will be true if the frame is meant to be skipped. - // No other information will be populated. - Skippable bool - // If set there is a checksum present for the block content. + // The checksum field at the end is always 4 bytes long. HasCheckSum bool - - // If this is true FrameContentSize will have a valid value - HasFCS bool - - SingleSegment bool } // Decode the header from the beginning of the stream. @@ -71,39 +95,46 @@ type Header struct { // If there isn't enough input, io.ErrUnexpectedEOF is returned. // The FirstBlock.OK will indicate if enough information was available to decode the first block header. func (h *Header) Decode(in []byte) error { + *h = Header{} if len(in) < 4 { return io.ErrUnexpectedEOF } + h.HeaderSize += 4 b, in := in[:4], in[4:] - if !bytes.Equal(b, frameMagic) { - if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 { + if string(b) != frameMagic { + if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 { return ErrMagicMismatch } - *h = Header{Skippable: true} + if len(in) < 4 { + return io.ErrUnexpectedEOF + } + h.HeaderSize += 4 + h.Skippable = true + h.SkippableID = int(b[0] & 0xf) + h.SkippableSize = binary.LittleEndian.Uint32(in) return nil } + + // Read Window_Descriptor + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor if len(in) < 1 { return io.ErrUnexpectedEOF } - - // Clear output - *h = Header{} fhd, in := in[0], in[1:] + h.HeaderSize++ h.SingleSegment = fhd&(1<<5) != 0 h.HasCheckSum = fhd&(1<<2) != 0 - if fhd&(1<<3) != 0 { return errors.New("reserved bit set on frame header") } - // Read Window_Descriptor - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor if !h.SingleSegment { if len(in) < 1 { return io.ErrUnexpectedEOF } var wd byte wd, in = in[0], in[1:] + h.HeaderSize++ windowLog := 10 + (wd >> 3) windowBase := uint64(1) << windowLog windowAdd := (windowBase / 8) * uint64(wd&0x7) @@ -120,10 +151,8 @@ func (h *Header) Decode(in []byte) error { return io.ErrUnexpectedEOF } b, in = in[:size], in[size:] - if b == nil { - return io.ErrUnexpectedEOF - } - switch size { + h.HeaderSize += int(size) + switch len(b) { case 1: h.DictionaryID = uint32(b[0]) case 2: @@ -152,10 +181,8 @@ func (h *Header) Decode(in []byte) error { return io.ErrUnexpectedEOF } b, in = in[:fcsSize], in[fcsSize:] - if b == nil { - return io.ErrUnexpectedEOF - } - switch fcsSize { + h.HeaderSize += int(fcsSize) + switch len(b) { case 1: h.FrameContentSize = uint64(b[0]) case 2: diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index f430f58b..f04aaa21 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -5,9 +5,12 @@ package zstd import ( - "errors" + "context" + "encoding/binary" "io" "sync" + + "github.com/klauspost/compress/zstd/internal/xxhash" ) // Decoder provides decoding of zstandard streams. @@ -22,15 +25,22 @@ type Decoder struct { // Unreferenced decoders, ready for use. decoders chan *blockDec - // Streams ready to be decoded. - stream chan decodeStream - // Current read position used for Reader functionality. current decoderState + // sync stream decoding + syncStream struct { + decodedFrame uint64 + br readerWrapper + enabled bool + inFrame bool + dstBuf []byte + } + + frame *frameDec + // Custom dictionaries. - // Always uses copies. - dicts map[uint32]dict + dicts map[uint32]*dict // streamWg is the waitgroup for all streams streamWg sync.WaitGroup @@ -46,7 +56,10 @@ type decoderState struct { output chan decodeOutput // cancel remaining output. - cancel chan struct{} + cancel context.CancelFunc + + // crc of current frame + crc *xxhash.Digest flushed bool } @@ -81,7 +94,7 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { return nil, err } } - d.current.output = make(chan decodeOutput, d.o.concurrent) + d.current.crc = xxhash.New() d.current.flushed = true if r == nil { @@ -89,7 +102,7 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { } // Transfer option dicts. - d.dicts = make(map[uint32]dict, len(d.o.dicts)) + d.dicts = make(map[uint32]*dict, len(d.o.dicts)) for _, dc := range d.o.dicts { d.dicts[dc.id] = dc } @@ -130,7 +143,7 @@ func (d *Decoder) Read(p []byte) (int, error) { break } if !d.nextBlock(n == 0) { - return n, nil + return n, d.current.err } } } @@ -162,6 +175,7 @@ func (d *Decoder) Reset(r io.Reader) error { d.drainOutput() + d.syncStream.br.r = nil if r == nil { d.current.err = ErrDecoderNilInput if len(d.current.b) > 0 { @@ -172,21 +186,23 @@ func (d *Decoder) Reset(r io.Reader) error { } // If bytes buffer and < 5MB, do sync decoding anyway. - if bb, ok := r.(byter); ok && bb.Len() < 5<<20 { + if bb, ok := r.(byter); ok && bb.Len() < d.o.decodeBufsBelow && !d.o.limitToCap { bb2 := bb if debugDecoder { println("*bytes.Buffer detected, doing sync decode, len:", bb.Len()) } b := bb2.Bytes() var dst []byte - if cap(d.current.b) > 0 { - dst = d.current.b + if cap(d.syncStream.dstBuf) > 0 { + dst = d.syncStream.dstBuf[:0] } - dst, err := d.DecodeAll(b, dst[:0]) + dst, err := d.DecodeAll(b, dst) if err == nil { err = io.EOF } + // Save output buffer + d.syncStream.dstBuf = dst d.current.b = dst d.current.err = err d.current.flushed = true @@ -195,33 +211,40 @@ func (d *Decoder) Reset(r io.Reader) error { } return nil } - - if d.stream == nil { - d.stream = make(chan decodeStream, 1) - d.streamWg.Add(1) - go d.startStreamDecoder(d.stream) - } - // Remove current block. + d.stashDecoder() d.current.decodeOutput = decodeOutput{} d.current.err = nil - d.current.cancel = make(chan struct{}) d.current.flushed = false d.current.d = nil + d.syncStream.dstBuf = nil - d.stream <- decodeStream{ - r: r, - output: d.current.output, - cancel: d.current.cancel, + // Ensure no-one else is still running... + d.streamWg.Wait() + if d.frame == nil { + d.frame = newFrameDec(d.o) } + + if d.o.concurrent == 1 { + return d.startSyncDecoder(r) + } + + d.current.output = make(chan decodeOutput, d.o.concurrent) + ctx, cancel := context.WithCancel(context.Background()) + d.current.cancel = cancel + d.streamWg.Add(1) + go d.startStreamDecoder(ctx, r, d.current.output) + return nil } // drainOutput will drain the output until errEndOfStream is sent. func (d *Decoder) drainOutput() { if d.current.cancel != nil { - println("cancelling current") - close(d.current.cancel) + if debugDecoder { + println("cancelling current") + } + d.current.cancel() d.current.cancel = nil } if d.current.d != nil { @@ -243,12 +266,9 @@ func (d *Decoder) drainOutput() { } d.decoders <- v.d } - if v.err == errEndOfStream { - println("current flushed") - d.current.flushed = true - return - } } + d.current.output = nil + d.current.flushed = true } // WriteTo writes data to w until there's no more data to write or when an error occurs. @@ -287,19 +307,23 @@ func (d *Decoder) WriteTo(w io.Writer) (int64, error) { // DecodeAll can be used concurrently. // The Decoder concurrency limits will be respected. func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { - if d.current.err == ErrDecoderClosed { + if d.decoders == nil { return dst, ErrDecoderClosed } // Grab a block decoder and frame decoder. block := <-d.decoders frame := block.localFrame + initialSize := len(dst) defer func() { if debugDecoder { printf("re-adding decoder: %p", block) } frame.rawInput = nil frame.bBuf = nil + if frame.history.decoders.br != nil { + frame.history.decoders.br.in = nil + } d.decoders <- block }() frame.bBuf = input @@ -307,34 +331,45 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { for { frame.history.reset() err := frame.reset(&frame.bBuf) - if err == io.EOF { - if debugDecoder { - println("frame reset return EOF") - } - return dst, nil - } - if frame.DictionaryID != nil { - dict, ok := d.dicts[*frame.DictionaryID] - if !ok { - return nil, ErrUnknownDictionary - } - frame.history.setDict(&dict) - } if err != nil { + if err == io.EOF { + if debugDecoder { + println("frame reset return EOF") + } + return dst, nil + } return dst, err } - if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) { - return dst, ErrDecoderSizeExceeded + if err = d.setDict(frame); err != nil { + return nil, err } - if frame.FrameContentSize > 0 && frame.FrameContentSize < 1<<30 { - // Never preallocate moe than 1 GB up front. + if frame.WindowSize > d.o.maxWindowSize { + if debugDecoder { + println("window size exceeded:", frame.WindowSize, ">", d.o.maxWindowSize) + } + return dst, ErrWindowSizeExceeded + } + if frame.FrameContentSize != fcsUnknown { + if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)-initialSize) { + if debugDecoder { + println("decoder size exceeded; fcs:", frame.FrameContentSize, "> mcs:", d.o.maxDecodedSize-uint64(len(dst)-initialSize), "len:", len(dst)) + } + return dst, ErrDecoderSizeExceeded + } + if d.o.limitToCap && frame.FrameContentSize > uint64(cap(dst)-len(dst)) { + if debugDecoder { + println("decoder size exceeded; fcs:", frame.FrameContentSize, "> (cap-len)", cap(dst)-len(dst)) + } + return dst, ErrDecoderSizeExceeded + } if cap(dst)-len(dst) < int(frame.FrameContentSize) { - dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)) + dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)+compressedBlockOverAlloc) copy(dst2, dst) dst = dst2 } } - if cap(dst) == 0 { + + if cap(dst) == 0 && !d.o.limitToCap { // Allocate len(input) * 2 by default if nothing is provided // and we didn't get frame content size. size := len(input) * 2 @@ -352,6 +387,9 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { if err != nil { return dst, err } + if uint64(len(dst)-initialSize) > d.o.maxDecodedSize { + return dst, ErrDecoderSizeExceeded + } if len(frame.bBuf) == 0 { if debugDecoder { println("frame dbuf empty") @@ -368,33 +406,167 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { // If non-blocking mode is used the returned boolean will be false // if no data was available without blocking. func (d *Decoder) nextBlock(blocking bool) (ok bool) { - if d.current.d != nil { - if debugDecoder { - printf("re-adding current decoder %p", d.current.d) - } - d.decoders <- d.current.d - d.current.d = nil - } if d.current.err != nil { // Keep error state. - return blocking + return false } + d.current.b = d.current.b[:0] + // SYNC: + if d.syncStream.enabled { + if !blocking { + return false + } + ok = d.nextBlockSync() + if !ok { + d.stashDecoder() + } + return ok + } + + //ASYNC: + d.stashDecoder() if blocking { - d.current.decodeOutput = <-d.current.output + d.current.decodeOutput, ok = <-d.current.output } else { select { - case d.current.decodeOutput = <-d.current.output: + case d.current.decodeOutput, ok = <-d.current.output: default: return false } } + if !ok { + // This should not happen, so signal error state... + d.current.err = io.ErrUnexpectedEOF + return false + } + next := d.current.decodeOutput + if next.d != nil && next.d.async.newHist != nil { + d.current.crc.Reset() + } if debugDecoder { - println("got", len(d.current.b), "bytes, error:", d.current.err) + var tmp [4]byte + binary.LittleEndian.PutUint32(tmp[:], uint32(xxhash.Sum64(next.b))) + println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp) + } + + if d.o.ignoreChecksum { + return true + } + + if len(next.b) > 0 { + d.current.crc.Write(next.b) + } + if next.err == nil && next.d != nil && next.d.hasCRC { + got := uint32(d.current.crc.Sum64()) + if got != next.d.checkCRC { + if debugDecoder { + printf("CRC Check Failed: %08x (got) != %08x (on stream)\n", got, next.d.checkCRC) + } + d.current.err = ErrCRCMismatch + } else { + if debugDecoder { + printf("CRC ok %08x\n", got) + } + } + } + + return true +} + +func (d *Decoder) nextBlockSync() (ok bool) { + if d.current.d == nil { + d.current.d = <-d.decoders + } + for len(d.current.b) == 0 { + if !d.syncStream.inFrame { + d.frame.history.reset() + d.current.err = d.frame.reset(&d.syncStream.br) + if d.current.err == nil { + d.current.err = d.setDict(d.frame) + } + if d.current.err != nil { + return false + } + if d.frame.WindowSize > d.o.maxDecodedSize || d.frame.WindowSize > d.o.maxWindowSize { + d.current.err = ErrDecoderSizeExceeded + return false + } + + d.syncStream.decodedFrame = 0 + d.syncStream.inFrame = true + } + d.current.err = d.frame.next(d.current.d) + if d.current.err != nil { + return false + } + d.frame.history.ensureBlock() + if debugDecoder { + println("History trimmed:", len(d.frame.history.b), "decoded already:", d.syncStream.decodedFrame) + } + histBefore := len(d.frame.history.b) + d.current.err = d.current.d.decodeBuf(&d.frame.history) + + if d.current.err != nil { + println("error after:", d.current.err) + return false + } + d.current.b = d.frame.history.b[histBefore:] + if debugDecoder { + println("history after:", len(d.frame.history.b)) + } + + // Check frame size (before CRC) + d.syncStream.decodedFrame += uint64(len(d.current.b)) + if d.syncStream.decodedFrame > d.frame.FrameContentSize { + if debugDecoder { + printf("DecodedFrame (%d) > FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize) + } + d.current.err = ErrFrameSizeExceeded + return false + } + + // Check FCS + if d.current.d.Last && d.frame.FrameContentSize != fcsUnknown && d.syncStream.decodedFrame != d.frame.FrameContentSize { + if debugDecoder { + printf("DecodedFrame (%d) != FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize) + } + d.current.err = ErrFrameSizeMismatch + return false + } + + // Update/Check CRC + if d.frame.HasCheckSum { + if !d.o.ignoreChecksum { + d.frame.crc.Write(d.current.b) + } + if d.current.d.Last { + if !d.o.ignoreChecksum { + d.current.err = d.frame.checkCRC() + } else { + d.current.err = d.frame.consumeCRC() + } + if d.current.err != nil { + println("CRC error:", d.current.err) + return false + } + } + } + d.syncStream.inFrame = !d.current.d.Last } return true } +func (d *Decoder) stashDecoder() { + if d.current.d != nil { + if debugDecoder { + printf("re-adding current decoder %p", d.current.d) + } + d.decoders <- d.current.d + d.current.d = nil + } +} + // Close will release all resources. // It is NOT possible to reuse the decoder after this. func (d *Decoder) Close() { @@ -402,10 +574,10 @@ func (d *Decoder) Close() { return } d.drainOutput() - if d.stream != nil { - close(d.stream) + if d.current.cancel != nil { + d.current.cancel() d.streamWg.Wait() - d.stream = nil + d.current.cancel = nil } if d.decoders != nil { close(d.decoders) @@ -456,100 +628,321 @@ type decodeOutput struct { err error } -type decodeStream struct { - r io.Reader - - // Blocks ready to be written to output. - output chan decodeOutput - - // cancel reading from the input - cancel chan struct{} +func (d *Decoder) startSyncDecoder(r io.Reader) error { + d.frame.history.reset() + d.syncStream.br = readerWrapper{r: r} + d.syncStream.inFrame = false + d.syncStream.enabled = true + d.syncStream.decodedFrame = 0 + return nil } -// errEndOfStream indicates that everything from the stream was read. -var errEndOfStream = errors.New("end-of-stream") - // Create Decoder: -// Spawn n block decoders. These accept tasks to decode a block. -// Create goroutine that handles stream processing, this will send history to decoders as they are available. -// Decoders update the history as they decode. -// When a block is returned: -// a) history is sent to the next decoder, -// b) content written to CRC. -// c) return data to WRITER. -// d) wait for next block to return data. -// Once WRITTEN, the decoders reused by the writer frame decoder for re-use. -func (d *Decoder) startStreamDecoder(inStream chan decodeStream) { +// ASYNC: +// Spawn 3 go routines. +// 0: Read frames and decode block literals. +// 1: Decode sequences. +// 2: Execute sequences, send to output. +func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output chan decodeOutput) { defer d.streamWg.Done() - frame := newFrameDec(d.o) - for stream := range inStream { - if debugDecoder { - println("got new stream") + br := readerWrapper{r: r} + + var seqDecode = make(chan *blockDec, d.o.concurrent) + var seqExecute = make(chan *blockDec, d.o.concurrent) + + // Async 1: Decode sequences... + go func() { + var hist history + var hasErr bool + + for block := range seqDecode { + if hasErr { + if block != nil { + seqExecute <- block + } + continue + } + if block.async.newHist != nil { + if debugDecoder { + println("Async 1: new history, recent:", block.async.newHist.recentOffsets) + } + hist.reset() + hist.decoders = block.async.newHist.decoders + hist.recentOffsets = block.async.newHist.recentOffsets + hist.windowSize = block.async.newHist.windowSize + if block.async.newHist.dict != nil { + hist.setDict(block.async.newHist.dict) + } + } + if block.err != nil || block.Type != blockTypeCompressed { + hasErr = block.err != nil + seqExecute <- block + continue + } + + hist.decoders.literals = block.async.literals + block.err = block.prepareSequences(block.async.seqData, &hist) + if debugDecoder && block.err != nil { + println("prepareSequences returned:", block.err) + } + hasErr = block.err != nil + if block.err == nil { + block.err = block.decodeSequences(&hist) + if debugDecoder && block.err != nil { + println("decodeSequences returned:", block.err) + } + hasErr = block.err != nil + // block.async.sequence = hist.decoders.seq[:hist.decoders.nSeqs] + block.async.seqSize = hist.decoders.seqSize + } + seqExecute <- block } - br := readerWrapper{r: stream.r} - decodeStream: - for { - frame.history.reset() - err := frame.reset(&br) - if debugDecoder && err != nil { - println("Frame decoder returned", err) + close(seqExecute) + hist.reset() + }() + + var wg sync.WaitGroup + wg.Add(1) + + // Async 3: Execute sequences... + frameHistCache := d.frame.history.b + go func() { + var hist history + var decodedFrame uint64 + var fcs uint64 + var hasErr bool + for block := range seqExecute { + out := decodeOutput{err: block.err, d: block} + if block.err != nil || hasErr { + hasErr = true + output <- out + continue + } + if block.async.newHist != nil { + if debugDecoder { + println("Async 2: new history") + } + hist.reset() + hist.windowSize = block.async.newHist.windowSize + hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer + if block.async.newHist.dict != nil { + hist.setDict(block.async.newHist.dict) + } + + if cap(hist.b) < hist.allocFrameBuffer { + if cap(frameHistCache) >= hist.allocFrameBuffer { + hist.b = frameHistCache + } else { + hist.b = make([]byte, 0, hist.allocFrameBuffer) + println("Alloc history sized", hist.allocFrameBuffer) + } + } + hist.b = hist.b[:0] + fcs = block.async.fcs + decodedFrame = 0 } - if err == nil && frame.DictionaryID != nil { - dict, ok := d.dicts[*frame.DictionaryID] - if !ok { - err = ErrUnknownDictionary + do := decodeOutput{err: block.err, d: block} + switch block.Type { + case blockTypeRLE: + if debugDecoder { + println("add rle block length:", block.RLESize) + } + + if cap(block.dst) < int(block.RLESize) { + if block.lowMem { + block.dst = make([]byte, block.RLESize) + } else { + block.dst = make([]byte, maxCompressedBlockSize) + } + } + block.dst = block.dst[:block.RLESize] + v := block.data[0] + for i := range block.dst { + block.dst[i] = v + } + hist.append(block.dst) + do.b = block.dst + case blockTypeRaw: + if debugDecoder { + println("add raw block length:", len(block.data)) + } + hist.append(block.data) + do.b = block.data + case blockTypeCompressed: + if debugDecoder { + println("execute with history length:", len(hist.b), "window:", hist.windowSize) + } + hist.decoders.seqSize = block.async.seqSize + hist.decoders.literals = block.async.literals + do.err = block.executeSequences(&hist) + hasErr = do.err != nil + if debugDecoder && hasErr { + println("executeSequences returned:", do.err) + } + do.b = block.dst + } + if !hasErr { + decodedFrame += uint64(len(do.b)) + if decodedFrame > fcs { + println("fcs exceeded", block.Last, fcs, decodedFrame) + do.err = ErrFrameSizeExceeded + hasErr = true + } else if block.Last && fcs != fcsUnknown && decodedFrame != fcs { + do.err = ErrFrameSizeMismatch + hasErr = true } else { - frame.history.setDict(&dict) + if debugDecoder { + println("fcs ok", block.Last, fcs, decodedFrame) + } } } - if err != nil { - stream.output <- decodeOutput{ - err: err, + output <- do + } + close(output) + frameHistCache = hist.b + wg.Done() + if debugDecoder { + println("decoder goroutines finished") + } + hist.reset() + }() + + var hist history +decodeStream: + for { + var hasErr bool + hist.reset() + decodeBlock := func(block *blockDec) { + if hasErr { + if block != nil { + seqDecode <- block } - break + return } + if block.err != nil || block.Type != blockTypeCompressed { + hasErr = block.err != nil + seqDecode <- block + return + } + + remain, err := block.decodeLiterals(block.data, &hist) + block.err = err + hasErr = block.err != nil + if err == nil { + block.async.literals = hist.decoders.literals + block.async.seqData = remain + } else if debugDecoder { + println("decodeLiterals error:", err) + } + seqDecode <- block + } + frame := d.frame + if debugDecoder { + println("New frame...") + } + var historySent bool + frame.history.reset() + err := frame.reset(&br) + if debugDecoder && err != nil { + println("Frame decoder returned", err) + } + if err == nil { + err = d.setDict(frame) + } + if err == nil && d.frame.WindowSize > d.o.maxWindowSize { if debugDecoder { - println("starting frame decoder") - } - - // This goroutine will forward history between frames. - frame.frameDone.Add(1) - frame.initAsync() - - go frame.startDecoder(stream.output) - decodeFrame: - // Go through all blocks of the frame. - for { - dec := <-d.decoders - select { - case <-stream.cancel: - if !frame.sendErr(dec, io.EOF) { - // To not let the decoder dangle, send it back. - stream.output <- decodeOutput{d: dec} - } - break decodeStream - default: + println("decoder size exceeded, fws:", d.frame.WindowSize, "> mws:", d.o.maxWindowSize) + } + + err = ErrDecoderSizeExceeded + } + if err != nil { + select { + case <-ctx.Done(): + case dec := <-d.decoders: + dec.sendErr(err) + decodeBlock(dec) + } + break decodeStream + } + + // Go through all blocks of the frame. + for { + var dec *blockDec + select { + case <-ctx.Done(): + break decodeStream + case dec = <-d.decoders: + // Once we have a decoder, we MUST return it. + } + err := frame.next(dec) + if !historySent { + h := frame.history + if debugDecoder { + println("Alloc History:", h.allocFrameBuffer) + } + hist.reset() + if h.dict != nil { + hist.setDict(h.dict) } - err := frame.next(dec) - switch err { - case io.EOF: - // End of current frame, no error - println("EOF on next block") - break decodeFrame - case nil: - continue - default: - println("block decoder returned", err) - break decodeStream + dec.async.newHist = &h + dec.async.fcs = frame.FrameContentSize + historySent = true + } else { + dec.async.newHist = nil + } + if debugDecoder && err != nil { + println("next block returned error:", err) + } + dec.err = err + dec.hasCRC = false + if dec.Last && frame.HasCheckSum && err == nil { + crc, err := frame.rawInput.readSmall(4) + if len(crc) < 4 { + if err == nil { + err = io.ErrUnexpectedEOF + + } + println("CRC missing?", err) + dec.err = err + } else { + dec.checkCRC = binary.LittleEndian.Uint32(crc) + dec.hasCRC = true + if debugDecoder { + printf("found crc to check: %08x\n", dec.checkCRC) + } } } - // All blocks have started decoding, check if there are more frames. - println("waiting for done") - frame.frameDone.Wait() - println("done waiting...") + err = dec.err + last := dec.Last + decodeBlock(dec) + if err != nil { + break decodeStream + } + if last { + break + } } - frame.frameDone.Wait() - println("Sending EOS") - stream.output <- decodeOutput{err: errEndOfStream} } + close(seqDecode) + wg.Wait() + hist.reset() + d.frame.history.b = frameHistCache +} + +func (d *Decoder) setDict(frame *frameDec) (err error) { + dict, ok := d.dicts[frame.DictionaryID] + if ok { + if debugDecoder { + println("setting dict", frame.DictionaryID) + } + frame.history.setDict(dict) + } else if frame.DictionaryID != 0 { + // A zero or missing dictionary id is ambiguous: + // either dictionary zero, or no dictionary. In particular, + // zstd --patch-from uses this id for the source file, + // so only return an error if the dictionary id is not zero. + err = ErrUnknownDictionary + } + return err } diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go index 95cc9b8b..774c5f00 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go @@ -6,6 +6,8 @@ package zstd import ( "errors" + "fmt" + "math/bits" "runtime" ) @@ -14,21 +16,28 @@ type DOption func(*decoderOptions) error // options retains accumulated state of multiple options. type decoderOptions struct { - lowMem bool - concurrent int - maxDecodedSize uint64 - maxWindowSize uint64 - dicts []dict + lowMem bool + concurrent int + maxDecodedSize uint64 + maxWindowSize uint64 + dicts []*dict + ignoreChecksum bool + limitToCap bool + decodeBufsBelow int } func (o *decoderOptions) setDefault() { *o = decoderOptions{ // use less ram: true for now, but may change. - lowMem: true, - concurrent: runtime.GOMAXPROCS(0), - maxWindowSize: MaxWindowSize, + lowMem: true, + concurrent: runtime.GOMAXPROCS(0), + maxWindowSize: MaxWindowSize, + decodeBufsBelow: 128 << 10, } - o.maxDecodedSize = 1 << 63 + if o.concurrent > 4 { + o.concurrent = 4 + } + o.maxDecodedSize = 64 << 30 } // WithDecoderLowmem will set whether to use a lower amount of memory, @@ -37,16 +46,25 @@ func WithDecoderLowmem(b bool) DOption { return func(o *decoderOptions) error { o.lowMem = b; return nil } } -// WithDecoderConcurrency will set the concurrency, -// meaning the maximum number of decoders to run concurrently. -// The value supplied must be at least 1. -// By default this will be set to GOMAXPROCS. +// WithDecoderConcurrency sets the number of created decoders. +// When decoding block with DecodeAll, this will limit the number +// of possible concurrently running decodes. +// When decoding streams, this will limit the number of +// inflight blocks. +// When decoding streams and setting maximum to 1, +// no async decoding will be done. +// When a value of 0 is provided GOMAXPROCS will be used. +// By default this will be set to 4 or GOMAXPROCS, whatever is lower. func WithDecoderConcurrency(n int) DOption { return func(o *decoderOptions) error { - if n <= 0 { + if n < 0 { return errors.New("concurrency must be at least 1") } - o.concurrent = n + if n == 0 { + o.concurrent = runtime.GOMAXPROCS(0) + } else { + o.concurrent = n + } return nil } } @@ -54,7 +72,7 @@ func WithDecoderConcurrency(n int) DOption { // WithDecoderMaxMemory allows to set a maximum decoded size for in-memory // non-streaming operations or maximum window size for streaming operations. // This can be used to control memory usage of potentially hostile content. -// Maximum and default is 1 << 63 bytes. +// Maximum is 1 << 63 bytes. Default is 64GiB. func WithDecoderMaxMemory(n uint64) DOption { return func(o *decoderOptions) error { if n == 0 { @@ -69,7 +87,13 @@ func WithDecoderMaxMemory(n uint64) DOption { } // WithDecoderDicts allows to register one or more dictionaries for the decoder. -// If several dictionaries with the same ID is provided the last one will be used. +// +// Each slice in dict must be in the [dictionary format] produced by +// "zstd --train" from the Zstandard reference implementation. +// +// If several dictionaries with the same ID are provided, the last one will be used. +// +// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format func WithDecoderDicts(dicts ...[]byte) DOption { return func(o *decoderOptions) error { for _, b := range dicts { @@ -77,8 +101,20 @@ func WithDecoderDicts(dicts ...[]byte) DOption { if err != nil { return err } - o.dicts = append(o.dicts, *d) + o.dicts = append(o.dicts, d) + } + return nil + } +} + +// WithDecoderDictRaw registers a dictionary that may be used by the decoder. +// The slice content can be arbitrary data. +func WithDecoderDictRaw(id uint32, content []byte) DOption { + return func(o *decoderOptions) error { + if bits.UintSize > 32 && uint(len(content)) > dictMaxLength { + return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content)) } + o.dicts = append(o.dicts, &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}) return nil } } @@ -100,3 +136,34 @@ func WithDecoderMaxWindow(size uint64) DOption { return nil } } + +// WithDecodeAllCapLimit will limit DecodeAll to decoding cap(dst)-len(dst) bytes, +// or any size set in WithDecoderMaxMemory. +// This can be used to limit decoding to a specific maximum output size. +// Disabled by default. +func WithDecodeAllCapLimit(b bool) DOption { + return func(o *decoderOptions) error { + o.limitToCap = b + return nil + } +} + +// WithDecodeBuffersBelow will fully decode readers that have a +// `Bytes() []byte` and `Len() int` interface similar to bytes.Buffer. +// This typically uses less allocations but will have the full decompressed object in memory. +// Note that DecodeAllCapLimit will disable this, as well as giving a size of 0 or less. +// Default is 128KiB. +func WithDecodeBuffersBelow(size int) DOption { + return func(o *decoderOptions) error { + o.decodeBufsBelow = size + return nil + } +} + +// IgnoreChecksum allows to forcibly ignore checksum checking. +func IgnoreChecksum(b bool) DOption { + return func(o *decoderOptions) error { + o.ignoreChecksum = b + return nil + } +} diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go index a36ae83e..ca095145 100644 --- a/vendor/github.com/klauspost/compress/zstd/dict.go +++ b/vendor/github.com/klauspost/compress/zstd/dict.go @@ -1,7 +1,6 @@ package zstd import ( - "bytes" "encoding/binary" "errors" "fmt" @@ -20,7 +19,10 @@ type dict struct { content []byte } -var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec} +const dictMagic = "\x37\xa4\x30\xec" + +// Maximum dictionary size for the reference implementation (1.5.3) is 2 GiB. +const dictMaxLength = 1 << 31 // ID returns the dictionary id or 0 if d is nil. func (d *dict) ID() uint32 { @@ -30,14 +32,38 @@ func (d *dict) ID() uint32 { return d.id } -// DictContentSize returns the dictionary content size or 0 if d is nil. -func (d *dict) DictContentSize() int { +// ContentSize returns the dictionary content size or 0 if d is nil. +func (d *dict) ContentSize() int { if d == nil { return 0 } return len(d.content) } +// Content returns the dictionary content. +func (d *dict) Content() []byte { + if d == nil { + return nil + } + return d.content +} + +// Offsets returns the initial offsets. +func (d *dict) Offsets() [3]int { + if d == nil { + return [3]int{} + } + return d.offsets +} + +// LitEncoder returns the literal encoder. +func (d *dict) LitEncoder() *huff0.Scratch { + if d == nil { + return nil + } + return d.litEnc +} + // Load a dictionary as described in // https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format func loadDict(b []byte) (*dict, error) { @@ -50,7 +76,7 @@ func loadDict(b []byte) (*dict, error) { ofDec: sequenceDec{fse: &fseDecoder{}}, mlDec: sequenceDec{fse: &fseDecoder{}}, } - if !bytes.Equal(b[:4], dictMagic[:]) { + if string(b[:4]) != dictMagic { return nil, ErrMagicMismatch } d.id = binary.LittleEndian.Uint32(b[4:8]) @@ -62,7 +88,7 @@ func loadDict(b []byte) (*dict, error) { var err error d.litEnc, b, err = huff0.ReadTable(b[8:], nil) if err != nil { - return nil, err + return nil, fmt.Errorf("loading literal table: %w", err) } d.litEnc.Reuse = huff0.ReusePolicyMust @@ -120,3 +146,16 @@ func loadDict(b []byte) (*dict, error) { return &d, nil } + +// InspectDictionary loads a zstd dictionary and provides functions to inspect the content. +func InspectDictionary(b []byte) (interface { + ID() uint32 + ContentSize() int + Content() []byte + Offsets() [3]int + LitEncoder() *huff0.Scratch +}, error) { + initPredefined() + d, err := loadDict(b) + return d, err +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go index 295cd602..5ca46038 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_base.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go @@ -16,6 +16,7 @@ type fastBase struct { cur int32 // maximum offset. Should be at least 2x block size. maxMatchOff int32 + bufferReset int32 hist []byte crc *xxhash.Digest tmp [8]byte @@ -56,8 +57,8 @@ func (e *fastBase) Block() *blockEnc { } func (e *fastBase) addBlock(src []byte) int32 { - if debugAsserts && e.cur > bufferReset { - panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset)) + if debugAsserts && e.cur > e.bufferReset { + panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset)) } // check if we have space already if len(e.hist)+len(src) > cap(e.hist) { @@ -108,11 +109,6 @@ func (e *fastBase) UseBlock(enc *blockEnc) { e.blk = enc } -func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 { - // Extend the match to be as long as possible. - return int32(matchLen(src[s:], src[t:])) -} - func (e *fastBase) matchlen(s, t int32, src []byte) int32 { if debugAsserts { if s < 0 { @@ -131,8 +127,6 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 { panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize)) } } - - // Extend the match to be as long as possible. return int32(matchLen(src[s:], src[t:])) } @@ -150,18 +144,19 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) { } else { e.crc.Reset() } + e.blk.dictLitEnc = nil if d != nil { low := e.lowMem if singleBlock { e.lowMem = true } - e.ensureHist(d.DictContentSize() + maxCompressedBlockSize) + e.ensureHist(d.ContentSize() + maxCompressedBlockSize) e.lowMem = low } // We offset current position so everything will be out of reach. // If above reset line, history will be purged. - if e.cur < bufferReset { + if e.cur < e.bufferReset { e.cur += e.maxMatchOff + int32(len(e.hist)) } e.hist = e.hist[:0] diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go index 96028ecd..9819d414 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_best.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go @@ -34,7 +34,7 @@ type match struct { est int32 } -const highScore = 25000 +const highScore = maxMatchLen * 8 // estBits will estimate output bits from predefined tables. func (m *match) estBits(bitsPerByte int32) { @@ -84,14 +84,10 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - for e.cur >= bufferReset { + for e.cur >= e.bufferReset-int32(len(e.hist)) { if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = prevEntry{} - } - for i := range e.longTable[:] { - e.longTable[i] = prevEntry{} - } + e.table = [bestShortTableSize]prevEntry{} + e.longTable = [bestLongTableSize]prevEntry{} e.cur = e.maxMatchOff break } @@ -163,7 +159,6 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { // nextEmit is where in src the next emitLiteral should start from. nextEmit := s - cv := load6432(src, s) // Relative offsets offset1 := int32(blk.recentOffsets[0]) @@ -177,7 +172,6 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { blk.literals = append(blk.literals, src[nextEmit:until]...) s.litLen = uint32(until - nextEmit) } - _ = addLiterals if debugEncoder { println("recent offsets:", blk.recentOffsets) @@ -192,49 +186,96 @@ encodeLoop: panic("offset0 was 0") } - bestOf := func(a, b match) match { - if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 { - return a - } - return b - } - const goodEnough = 100 + const goodEnough = 250 + + cv := load6432(src, s) nextHashL := hashLen(cv, bestLongTableBits, bestLongLen) nextHashS := hashLen(cv, bestShortTableBits, bestShortLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] - matchAt := func(offset int32, s int32, first uint32, rep int32) match { + // Set m to a match at offset if it looks like that will improve compression. + improve := func(m *match, offset int32, s int32, first uint32, rep int32) { if s-offset >= e.maxMatchOff || load3232(src, offset) != first { - return match{s: s, est: highScore} + return } if debugAsserts { + if offset <= 0 { + panic(offset) + } if !bytes.Equal(src[s:s+4], src[offset:offset+4]) { panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first)) } } - m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep} - m.estBits(bitsPerByte) - return m + // Try to quick reject if we already have a long match. + if m.length > 16 { + left := len(src) - int(m.s+m.length) + // If we are too close to the end, keep as is. + if left <= 0 { + return + } + checkLen := m.length - (s - m.s) - 8 + if left > 2 && checkLen > 4 { + // Check 4 bytes, 4 bytes from the end of the current match. + a := load3232(src, offset+checkLen) + b := load3232(src, s+checkLen) + if a != b { + return + } + } + } + l := 4 + e.matchlen(s+4, offset+4, src) + if rep < 0 { + // Extend candidate match backwards as far as possible. + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for offset > tMin && s > nextEmit && src[offset-1] == src[s-1] && l < maxMatchLength { + s-- + offset-- + l++ + } + } + + cand := match{offset: offset, s: s, length: l, rep: rep} + cand.estBits(bitsPerByte) + if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 { + *m = cand + } } - best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) - best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)) - best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)) + best := match{s: s, est: highScore} + improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1) + improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1) + improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1) + improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1) if canRepeat && best.length < goodEnough { - cv32 := uint32(cv >> 8) - spp := s + 1 - best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1)) - best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2)) - best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3)) - if best.length > 0 { - cv32 = uint32(cv >> 24) - spp += 2 - best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1)) - best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2)) - best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3)) + if s == nextEmit { + // Check repeats straight after a match. + improve(&best, s-offset2, s, uint32(cv), 1|4) + improve(&best, s-offset3, s, uint32(cv), 2|4) + if offset1 > 1 { + improve(&best, s-(offset1-1), s, uint32(cv), 3|4) + } + } + + // If either no match or a non-repeat match, check at + 1 + if best.rep <= 0 { + cv32 := uint32(cv >> 8) + spp := s + 1 + improve(&best, spp-offset1, spp, cv32, 1) + improve(&best, spp-offset2, spp, cv32, 2) + improve(&best, spp-offset3, spp, cv32, 3) + if best.rep < 0 { + cv32 = uint32(cv >> 24) + spp += 2 + improve(&best, spp-offset1, spp, cv32, 1) + improve(&best, spp-offset2, spp, cv32, 2) + improve(&best, spp-offset3, spp, cv32, 3) + } } } // Load next and check... @@ -249,40 +290,45 @@ encodeLoop: if s >= sLimit { break encodeLoop } - cv = load6432(src, s) continue } - s++ candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)] - cv = load6432(src, s) - cv2 := load6432(src, s+1) + cv = load6432(src, s+1) + cv2 := load6432(src, s+2) candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)] candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)] // Short at s+1 - best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)) + improve(&best, candidateS.offset-e.cur, s+1, uint32(cv), -1) // Long at s+1, s+2 - best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)) - best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) - best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)) - best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)) + improve(&best, candidateL.offset-e.cur, s+1, uint32(cv), -1) + improve(&best, candidateL.prev-e.cur, s+1, uint32(cv), -1) + improve(&best, candidateL2.offset-e.cur, s+2, uint32(cv2), -1) + improve(&best, candidateL2.prev-e.cur, s+2, uint32(cv2), -1) if false { // Short at s+3. // Too often worse... - best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)) + improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+3, uint32(cv2>>8), -1) } - // See if we can find a better match by checking where the current best ends. - // Use that offset to see if we can find a better full match. - if sAt := best.s + best.length; sAt < sLimit { - nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen) - candidateEnd := e.longTable[nextHashL] - if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 { - bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1)) - if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 { - bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1)) + + // Start check at a fixed offset to allow for a few mismatches. + // For this compression level 2 yields the best results. + // We cannot do this if we have already indexed this position. + const skipBeginning = 2 + if best.s > s-skipBeginning { + // See if we can find a better match by checking where the current best ends. + // Use that offset to see if we can find a better full match. + if sAt := best.s + best.length; sAt < sLimit { + nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen) + candidateEnd := e.longTable[nextHashL] + + if off := candidateEnd.offset - e.cur - best.length + skipBeginning; off >= 0 { + improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) + if off := candidateEnd.prev - e.cur - best.length + skipBeginning; off >= 0 { + improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) + } } - best = bestEnd } } } @@ -295,51 +341,34 @@ encodeLoop: // We have a match, we can store the forward value if best.rep > 0 { - s = best.s var seq seq seq.matchLen = uint32(best.length - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := best.s - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - repIndex := best.offset - for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { - repIndex-- - start-- - seq.matchLen++ + if debugAsserts && s <= nextEmit { + panic("s <= nextEmit") } - addLiterals(&seq, start) + addLiterals(&seq, best.s) - // rep 0 - seq.offset = uint32(best.rep) + // Repeat. If bit 4 is set, this is a non-lit repeat. + seq.offset = uint32(best.rep & 3) if debugSequences { println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - // Index match start+1 (long) -> s - 1 - index0 := s + // Index old s + 1 -> s - 1 + index0 := s + 1 s = best.s + best.length nextEmit = s if s >= sLimit { if debugEncoder { println("repeat ended", s, best.length) - } break encodeLoop } // Index skipped... off := index0 + e.cur - for index0 < s-1 { + for index0 < s { cv0 := load6432(src, index0) h0 := hashLen(cv0, bestLongTableBits, bestLongLen) h1 := hashLen(cv0, bestShortTableBits, bestShortLen) @@ -349,17 +378,19 @@ encodeLoop: index0++ } switch best.rep { - case 2: + case 2, 4 | 1: offset1, offset2 = offset2, offset1 - case 3: + case 3, 4 | 2: offset1, offset2, offset3 = offset3, offset1, offset2 + case 4 | 3: + offset1, offset2, offset3 = offset1-1, offset1, offset2 } - cv = load6432(src, s) continue } // A 4-byte match has been found. Update recent offsets. // We'll later see if more than 4 bytes. + index0 := s + 1 s = best.s t := best.offset offset1, offset2, offset3 = s-t, offset1, offset2 @@ -372,22 +403,9 @@ encodeLoop: panic("invalid offset") } - // Extend the n-byte match as long as possible. - l := best.length - - // Extend backwards - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { - s-- - t-- - l++ - } - // Write our sequence var seq seq + l := best.length seq.litLen = uint32(s - nextEmit) seq.matchLen = uint32(l - zstdMinMatch) if seq.litLen > 0 { @@ -404,10 +422,8 @@ encodeLoop: break encodeLoop } - // Index match start+1 (long) -> s - 1 - index0 := s - l + 1 - // every entry - for index0 < s-1 { + // Index old s + 1 -> s - 1 + for index0 < s { cv0 := load6432(src, index0) h0 := hashLen(cv0, bestLongTableBits, bestLongLen) h1 := hashLen(cv0, bestShortTableBits, bestShortLen) @@ -416,50 +432,6 @@ encodeLoop: e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} index0++ } - - cv = load6432(src, s) - if !canRepeat { - continue - } - - // Check offset 2 - for { - o2 := s - offset2 - if load3232(src, o2) != uint32(cv) { - // Do regular search - break - } - - // Store this, since we have it. - nextHashS := hashLen(cv, bestShortTableBits, bestShortLen) - nextHashL := hashLen(cv, bestLongTableBits, bestLongLen) - - // We have at least 4 byte match. - // No need to check backwards. We come straight from a match - l := 4 + e.matchlen(s+4, o2+4, src) - - e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} - e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset} - seq.matchLen = uint32(l) - zstdMinMatch - seq.litLen = 0 - - // Since litlen is always 0, this is offset 1. - seq.offset = 1 - s += l - nextEmit = s - if debugSequences { - println("sequence", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - - // Swap offset 1 and 2. - offset1, offset2 = offset2, offset1 - if s >= sLimit { - // Finished - break encodeLoop - } - cv = load6432(src, s) - } } if int(nextEmit) < len(src) { diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go index 602c05ee..8582f31a 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -62,14 +62,10 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - for e.cur >= bufferReset { + for e.cur >= e.bufferReset-int32(len(e.hist)) { if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.longTable[:] { - e.longTable[i] = prevEntry{} - } + e.table = [betterShortTableSize]tableEntry{} + e.longTable = [betterLongTableSize]prevEntry{} e.cur = e.maxMatchOff break } @@ -156,8 +152,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -416,15 +412,23 @@ encodeLoop: // Try to find a better match by searching for a long match at the end of the current best match if s+matched < sLimit { + // Allow some bytes at the beginning to mismatch. + // Sweet spot is around 3 bytes, but depends on input. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 3 + nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen) - cv := load3232(src, s) + s2 := s + skipBeginning + cv := load3232(src, s2) candidateL := e.longTable[nextHashL] - coffsetL := candidateL.offset - e.cur - matched - if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + coffsetL := candidateL.offset - e.cur - matched + skipBeginning + if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { // Found a long match, at least 4 bytes. - matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4 if matchedNext > matched { t = coffsetL + s = s2 matched = matchedNext if debugMatches { println("long match at end-of-match") @@ -434,12 +438,13 @@ encodeLoop: // Check prev long... if true { - coffsetL = candidateL.prev - e.cur - matched - if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { + coffsetL = candidateL.prev - e.cur - matched + skipBeginning + if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) { // Found a long match, at least 4 bytes. - matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4 + matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4 if matchedNext > matched { t = coffsetL + s = s2 matched = matchedNext if debugMatches { println("prev long match at end-of-match") @@ -518,8 +523,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -578,7 +583,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - for e.cur >= bufferReset { + for e.cur >= e.bufferReset-int32(len(e.hist)) { if len(e.hist) == 0 { for i := range e.table[:] { e.table[i] = tableEntry{} @@ -674,8 +679,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -1047,8 +1052,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index d6b31042..a154c18f 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -44,14 +44,10 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - for e.cur >= bufferReset { + for e.cur >= e.bufferReset-int32(len(e.hist)) { if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.longTable[:] { - e.longTable[i] = tableEntry{} - } + e.table = [dFastShortTableSize]tableEntry{} + e.longTable = [dFastLongTableSize]tableEntry{} e.cur = e.maxMatchOff break } @@ -127,8 +123,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -388,7 +384,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - if e.cur >= bufferReset { + if e.cur >= e.bufferReset { for i := range e.table[:] { e.table[i] = tableEntry{} } @@ -439,8 +435,8 @@ encodeLoop: var t int32 for { - nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -685,7 +681,7 @@ encodeLoop: } // We do not store history, so we must offset e.cur to avoid false matches for next user. - if e.cur < bufferReset { + if e.cur < e.bufferReset { e.cur += int32(len(src)) } } @@ -700,7 +696,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - for e.cur >= bufferReset { + for e.cur >= e.bufferReset-int32(len(e.hist)) { if len(e.hist) == 0 { for i := range e.table[:] { e.table[i] = tableEntry{} @@ -785,8 +781,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -969,7 +965,7 @@ encodeLoop: te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen) - longHash2 := hashLen(cv0, dFastLongTableBits, dFastLongLen) + longHash2 := hashLen(cv1, dFastLongTableBits, dFastLongLen) e.longTable[longHash1] = te0 e.longTable[longHash2] = te1 e.markLongShardDirty(longHash1) @@ -1002,8 +998,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -1088,7 +1084,7 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) { } } e.lastDictID = d.id - e.allDirty = true + allDirty = true } // Reset table to initial state e.cur = e.maxMatchOff @@ -1103,7 +1099,8 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) { } if allDirty || dirtyShardCnt > dLongTableShardCnt/2 { - copy(e.longTable[:], e.dictLongTable) + //copy(e.longTable[:], e.dictLongTable) + e.longTable = *(*[dFastLongTableSize]tableEntry)(e.dictLongTable) for i := range e.longTableShardDirty { e.longTableShardDirty[i] = false } @@ -1114,7 +1111,9 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) { continue } - copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize]) + // copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize]) + *(*[dLongTableShardSize]tableEntry)(e.longTable[i*dLongTableShardSize:]) = *(*[dLongTableShardSize]tableEntry)(e.dictLongTable[i*dLongTableShardSize:]) + e.longTableShardDirty[i] = false } } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go index f2502629..f45a3da7 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go @@ -6,8 +6,6 @@ package zstd import ( "fmt" - "math" - "math/bits" ) const ( @@ -45,7 +43,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - for e.cur >= bufferReset { + for e.cur >= e.bufferReset-int32(len(e.hist)) { if len(e.hist) == 0 { for i := range e.table[:] { e.table[i] = tableEntry{} @@ -87,7 +85,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { // TEMPLATE const hashLog = tableBits // seems global, but would be nice to tweak. - const kSearchStrength = 7 + const kSearchStrength = 6 // nextEmit is where in src the next emitLiteral should start from. nextEmit := s @@ -135,21 +133,7 @@ encodeLoop: if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { // Consider history as well. var seq seq - var length int32 - // length = 4 + e.matchlen(s+6, repIndex+4, src) - { - a := src[s+6:] - b := src[repIndex+4:] - endI := len(a) & (math.MaxInt32 - 7) - length = int32(endI) + 4 - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 - break - } - } - } - + length := 4 + e.matchlen(s+6, repIndex+4, src) seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. @@ -236,20 +220,7 @@ encodeLoop: } // Extend the 4-byte match as long as possible. - //l := e.matchlen(s+4, t+4, src) + 4 - var l int32 - { - a := src[s+4:] - b := src[t+4:] - endI := len(a) & (math.MaxInt32 - 7) - l = int32(endI) + 4 - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 - break - } - } - } + l := e.matchlen(s+4, t+4, src) + 4 // Extend backwards tMin := s - e.maxMatchOff @@ -286,20 +257,7 @@ encodeLoop: if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { // We have at least 4 byte match. // No need to check backwards. We come straight from a match - //l := 4 + e.matchlen(s+4, o2+4, src) - var l int32 - { - a := src[s+4:] - b := src[o2+4:] - endI := len(a) & (math.MaxInt32 - 7) - l = int32(endI) + 4 - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 - break - } - } - } + l := 4 + e.matchlen(s+4, o2+4, src) // Store this, since we have it. nextHash := hashLen(cv, hashLog, tableFastHashLen) @@ -345,13 +303,13 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { minNonLiteralBlockSize = 1 + 1 + inputMargin ) if debugEncoder { - if len(src) > maxBlockSize { + if len(src) > maxCompressedBlockSize { panic("src too big") } } // Protect against e.cur wraparound. - if e.cur >= bufferReset { + if e.cur >= e.bufferReset { for i := range e.table[:] { e.table[i] = tableEntry{} } @@ -375,7 +333,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { // TEMPLATE const hashLog = tableBits // seems global, but would be nice to tweak. - const kSearchStrength = 8 + const kSearchStrength = 6 // nextEmit is where in src the next emitLiteral should start from. nextEmit := s @@ -418,21 +376,7 @@ encodeLoop: if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) { // Consider history as well. var seq seq - // length := 4 + e.matchlen(s+6, repIndex+4, src) - // length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) - var length int32 - { - a := src[s+6:] - b := src[repIndex+4:] - endI := len(a) & (math.MaxInt32 - 7) - length = int32(endI) + 4 - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 - break - } - } - } + length := 4 + e.matchlen(s+6, repIndex+4, src) seq.matchLen = uint32(length - zstdMinMatch) @@ -522,21 +466,7 @@ encodeLoop: panic(fmt.Sprintf("t (%d) < 0 ", t)) } // Extend the 4-byte match as long as possible. - //l := e.matchlenNoHist(s+4, t+4, src) + 4 - // l := int32(matchLen(src[s+4:], src[t+4:])) + 4 - var l int32 - { - a := src[s+4:] - b := src[t+4:] - endI := len(a) & (math.MaxInt32 - 7) - l = int32(endI) + 4 - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 - break - } - } - } + l := e.matchlen(s+4, t+4, src) + 4 // Extend backwards tMin := s - e.maxMatchOff @@ -573,21 +503,7 @@ encodeLoop: if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) { // We have at least 4 byte match. // No need to check backwards. We come straight from a match - //l := 4 + e.matchlenNoHist(s+4, o2+4, src) - // l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) - var l int32 - { - a := src[s+4:] - b := src[o2+4:] - endI := len(a) & (math.MaxInt32 - 7) - l = int32(endI) + 4 - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 - break - } - } - } + l := 4 + e.matchlen(s+4, o2+4, src) // Store this, since we have it. nextHash := hashLen(cv, hashLog, tableFastHashLen) @@ -621,7 +537,7 @@ encodeLoop: println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } // We do not store history, so we must offset e.cur to avoid false matches for next user. - if e.cur < bufferReset { + if e.cur < e.bufferReset { e.cur += int32(len(src)) } } @@ -638,11 +554,9 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) { return } // Protect against e.cur wraparound. - for e.cur >= bufferReset { + for e.cur >= e.bufferReset-int32(len(e.hist)) { if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } + e.table = [tableSize]tableEntry{} e.cur = e.maxMatchOff break } @@ -730,20 +644,7 @@ encodeLoop: if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { // Consider history as well. var seq seq - var length int32 - // length = 4 + e.matchlen(s+6, repIndex+4, src) - { - a := src[s+6:] - b := src[repIndex+4:] - endI := len(a) & (math.MaxInt32 - 7) - length = int32(endI) + 4 - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 - break - } - } - } + length := 4 + e.matchlen(s+6, repIndex+4, src) seq.matchLen = uint32(length - zstdMinMatch) @@ -831,20 +732,7 @@ encodeLoop: } // Extend the 4-byte match as long as possible. - //l := e.matchlen(s+4, t+4, src) + 4 - var l int32 - { - a := src[s+4:] - b := src[t+4:] - endI := len(a) & (math.MaxInt32 - 7) - l = int32(endI) + 4 - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 - break - } - } - } + l := e.matchlen(s+4, t+4, src) + 4 // Extend backwards tMin := s - e.maxMatchOff @@ -881,20 +769,7 @@ encodeLoop: if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { // We have at least 4 byte match. // No need to check backwards. We come straight from a match - //l := 4 + e.matchlen(s+4, o2+4, src) - var l int32 - { - a := src[s+4:] - b := src[o2+4:] - endI := len(a) & (math.MaxInt32 - 7) - l = int32(endI) + 4 - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 - break - } - } - } + l := 4 + e.matchlen(s+4, o2+4, src) // Store this, since we have it. nextHash := hashLen(cv, hashLog, tableFastHashLen) @@ -954,13 +829,12 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) { } if true { end := e.maxMatchOff + int32(len(d.content)) - 8 - for i := e.maxMatchOff; i < end; i += 3 { + for i := e.maxMatchOff; i < end; i += 2 { const hashLog = tableBits cv := load6432(d.content, i-e.maxMatchOff) - nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 5 - nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 6 - nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7 + nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 6 + nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 7 e.dictTable[nextHash] = tableEntry{ val: uint32(cv), offset: i, @@ -969,10 +843,6 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) { val: uint32(cv >> 8), offset: i + 1, } - e.dictTable[nextHash2] = tableEntry{ - val: uint32(cv >> 16), - offset: i + 2, - } } } e.lastDictID = d.id @@ -992,7 +862,8 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) { const shardCnt = tableShardCnt const shardSize = tableShardSize if e.allDirty || dirtyShardCnt > shardCnt*4/6 { - copy(e.table[:], e.dictTable) + //copy(e.table[:], e.dictTable) + e.table = *(*[tableSize]tableEntry)(e.dictTable) for i := range e.tableShardDirty { e.tableShardDirty[i] = false } @@ -1004,7 +875,8 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) { continue } - copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) + //copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize]) + *(*[shardSize]tableEntry)(e.table[i*shardSize:]) = *(*[shardSize]tableEntry)(e.dictTable[i*shardSize:]) e.tableShardDirty[i] = false } e.allDirty = false diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index e6e31596..4de0aed0 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -8,6 +8,7 @@ import ( "crypto/rand" "fmt" "io" + "math" rdebug "runtime/debug" "sync" @@ -98,23 +99,25 @@ func (e *Encoder) Reset(w io.Writer) { if cap(s.filling) == 0 { s.filling = make([]byte, 0, e.o.blockSize) } - if cap(s.current) == 0 { - s.current = make([]byte, 0, e.o.blockSize) - } - if cap(s.previous) == 0 { - s.previous = make([]byte, 0, e.o.blockSize) + if e.o.concurrent > 1 { + if cap(s.current) == 0 { + s.current = make([]byte, 0, e.o.blockSize) + } + if cap(s.previous) == 0 { + s.previous = make([]byte, 0, e.o.blockSize) + } + s.current = s.current[:0] + s.previous = s.previous[:0] + if s.writing == nil { + s.writing = &blockEnc{lowMem: e.o.lowMem} + s.writing.init() + } + s.writing.initNewEncode() } if s.encoder == nil { s.encoder = e.o.encoder() } - if s.writing == nil { - s.writing = &blockEnc{lowMem: e.o.lowMem} - s.writing.init() - } - s.writing.initNewEncode() s.filling = s.filling[:0] - s.current = s.current[:0] - s.previous = s.previous[:0] s.encoder.Reset(e.o.dict, false) s.headerWritten = false s.eofWritten = false @@ -258,6 +261,32 @@ func (e *Encoder) nextBlock(final bool) error { return s.err } + // SYNC: + if e.o.concurrent == 1 { + src := s.filling + s.nInput += int64(len(s.filling)) + if debugEncoder { + println("Adding sync block,", len(src), "bytes, final:", final) + } + enc := s.encoder + blk := enc.Block() + blk.reset(nil) + enc.Encode(blk, src) + blk.last = final + if final { + s.eofWritten = true + } + + s.err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) + if s.err != nil { + return s.err + } + _, s.err = s.w.Write(blk.output) + s.nWritten += int64(len(blk.output)) + s.filling = s.filling[:0] + return s.err + } + // Move blocks forward. s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current s.nInput += int64(len(s.current)) @@ -300,22 +329,8 @@ func (e *Encoder) nextBlock(final bool) error { } s.wWg.Done() }() - err := errIncompressible - // If we got the exact same number of literals as input, - // assume the literals cannot be compressed. - if len(src) != len(blk.literals) || len(src) != e.o.blockSize { - err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) - } - switch err { - case errIncompressible: - if debugEncoder { - println("Storing incompressible block as raw") - } - blk.encodeRaw(src) - // In fast mode, we do not transfer offsets, so we don't have to deal with changing the. - case nil: - default: - s.writeErr = err + s.writeErr = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) + if s.writeErr != nil { return } _, s.writeErr = s.w.Write(blk.output) @@ -486,8 +501,8 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { // If a non-single block is needed the encoder will reset again. e.encoders <- enc }() - // Use single segments when above minimum window and below 1MB. - single := len(src) < 1<<20 && len(src) > MinWindowSize + // Use single segments when above minimum window and below window size. + single := len(src) <= e.o.windowSize && len(src) > MinWindowSize if e.o.single != nil { single = *e.o.single } @@ -509,7 +524,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } // If we can do everything in one block, prefer that. - if len(src) <= maxCompressedBlockSize { + if len(src) <= e.o.blockSize { enc.Reset(e.o.dict, true) // Slightly faster with no history and everything in one block. if e.o.crc { @@ -525,25 +540,15 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { // If we got the exact same number of literals as input, // assume the literals cannot be compressed. - err := errIncompressible oldout := blk.output - if len(blk.literals) != len(src) || len(src) != e.o.blockSize { - // Output directly to dst - blk.output = dst - err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) - } + // Output directly to dst + blk.output = dst - switch err { - case errIncompressible: - if debugEncoder { - println("Storing incompressible block as raw") - } - dst = blk.encodeRawTo(dst, src) - case nil: - dst = blk.output - default: + err := blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy) + if err != nil { panic(err) } + dst = blk.output blk.output = oldout } else { enc.Reset(e.o.dict, false) @@ -562,25 +567,11 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { if len(src) == 0 { blk.last = true } - err := errIncompressible - // If we got the exact same number of literals as input, - // assume the literals cannot be compressed. - if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize { - err = blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy) - } - - switch err { - case errIncompressible: - if debugEncoder { - println("Storing incompressible block as raw") - } - dst = blk.encodeRawTo(dst, todo) - blk.popOffsets() - case nil: - dst = append(dst, blk.output...) - default: + err := blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy) + if err != nil { panic(err) } + dst = append(dst, blk.output...) blk.reset(nil) } } @@ -597,3 +588,37 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } return dst } + +// MaxEncodedSize returns the expected maximum +// size of an encoded block or stream. +func (e *Encoder) MaxEncodedSize(size int) int { + frameHeader := 4 + 2 // magic + frame header & window descriptor + if e.o.dict != nil { + frameHeader += 4 + } + // Frame content size: + if size < 256 { + frameHeader++ + } else if size < 65536+256 { + frameHeader += 2 + } else if size < math.MaxInt32 { + frameHeader += 4 + } else { + frameHeader += 8 + } + // Final crc + if e.o.crc { + frameHeader += 4 + } + + // Max overhead is 3 bytes/block. + // There cannot be 0 blocks. + blocks := (size + e.o.blockSize) / e.o.blockSize + + // Combine, add padding. + maxSz := frameHeader + 3*blocks + size + if e.o.pad > 1 { + maxSz += calcSkippableFrame(int64(maxSz), int64(e.o.pad)) + } + return maxSz +} diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go index 7d29e1d6..faaf8192 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -3,6 +3,8 @@ package zstd import ( "errors" "fmt" + "math" + "math/bits" "runtime" "strings" ) @@ -24,6 +26,7 @@ type encoderOptions struct { allLitEntropy bool customWindow bool customALEntropy bool + customBlockSize bool lowMem bool dict *dict } @@ -33,10 +36,10 @@ func (o *encoderOptions) setDefault() { concurrent: runtime.GOMAXPROCS(0), crc: true, single: nil, - blockSize: 1 << 16, + blockSize: maxCompressedBlockSize, windowSize: 8 << 20, level: SpeedDefault, - allLitEntropy: true, + allLitEntropy: false, lowMem: false, } } @@ -46,22 +49,22 @@ func (o encoderOptions) encoder() encoder { switch o.level { case SpeedFastest: if o.dict != nil { - return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} + return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}} } - return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} + return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}} case SpeedDefault: if o.dict != nil { - return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}} + return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}} } - return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} + return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}} case SpeedBetterCompression: if o.dict != nil { - return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} + return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}} } - return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} + return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}} case SpeedBestCompression: - return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} + return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}} } panic("unknown compression level") } @@ -75,6 +78,7 @@ func WithEncoderCRC(b bool) EOption { // WithEncoderConcurrency will set the concurrency, // meaning the maximum number of encoders to run concurrently. // The value supplied must be at least 1. +// For streams, setting a value of 1 will disable async compression. // By default this will be set to GOMAXPROCS. func WithEncoderConcurrency(n int) EOption { return func(o *encoderOptions) error { @@ -106,6 +110,7 @@ func WithWindowSize(n int) EOption { o.customWindow = true if o.blockSize > o.windowSize { o.blockSize = o.windowSize + o.customBlockSize = true } return nil } @@ -124,7 +129,7 @@ func WithEncoderPadding(n int) EOption { } // No need to waste our time. if n == 1 { - o.pad = 0 + n = 0 } if n > 1<<30 { return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ") @@ -188,10 +193,9 @@ func EncoderLevelFromZstd(level int) EncoderLevel { return SpeedDefault case level >= 6 && level < 10: return SpeedBetterCompression - case level >= 10: + default: return SpeedBestCompression } - return SpeedDefault } // String provides a string representation of the compression level. @@ -222,6 +226,9 @@ func WithEncoderLevel(l EncoderLevel) EOption { switch o.level { case SpeedFastest: o.windowSize = 4 << 20 + if !o.customBlockSize { + o.blockSize = 1 << 16 + } case SpeedDefault: o.windowSize = 8 << 20 case SpeedBetterCompression: @@ -231,7 +238,7 @@ func WithEncoderLevel(l EncoderLevel) EOption { } } if !o.customALEntropy { - o.allLitEntropy = l > SpeedFastest + o.allLitEntropy = l > SpeedDefault } return nil @@ -278,7 +285,7 @@ func WithNoEntropyCompression(b bool) EOption { // a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range. // For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB. // This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations. -// If this is not specified, block encodes will automatically choose this based on the input size. +// If this is not specified, block encodes will automatically choose this based on the input size and the window size. // This setting has no effect on streamed encodes. func WithSingleSegment(b bool) EOption { return func(o *encoderOptions) error { @@ -299,7 +306,13 @@ func WithLowerEncoderMem(b bool) EOption { } // WithEncoderDict allows to register a dictionary that will be used for the encode. +// +// The slice dict must be in the [dictionary format] produced by +// "zstd --train" from the Zstandard reference implementation. +// // The encoder *may* choose to use no dictionary instead for certain payloads. +// +// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format func WithEncoderDict(dict []byte) EOption { return func(o *encoderOptions) error { d, err := loadDict(dict) @@ -310,3 +323,17 @@ func WithEncoderDict(dict []byte) EOption { return nil } } + +// WithEncoderDictRaw registers a dictionary that may be used by the encoder. +// +// The slice content may contain arbitrary data. It will be used as an initial +// history. +func WithEncoderDictRaw(id uint32, content []byte) EOption { + return func(o *encoderOptions) error { + if bits.UintSize > 32 && uint(len(content)) > dictMaxLength { + return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content)) + } + o.dict = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}} + return nil + } +} diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index 989c79f8..53e160f7 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -5,26 +5,20 @@ package zstd import ( - "bytes" + "encoding/binary" "encoding/hex" "errors" - "hash" "io" - "sync" "github.com/klauspost/compress/zstd/internal/xxhash" ) type frameDec struct { - o decoderOptions - crc hash.Hash64 - offset int64 + o decoderOptions + crc *xxhash.Digest WindowSize uint64 - // In order queue of blocks being decoded. - decoding chan *blockDec - // Frame history passed between blocks history history @@ -34,15 +28,10 @@ type frameDec struct { bBuf byteBuf FrameContentSize uint64 - frameDone sync.WaitGroup - DictionaryID *uint32 + DictionaryID uint32 HasCheckSum bool SingleSegment bool - - // asyncRunning indicates whether the async routine processes input on 'decoding'. - asyncRunningMu sync.Mutex - asyncRunning bool } const ( @@ -54,9 +43,9 @@ const ( MaxWindowSize = 1 << 29 ) -var ( - frameMagic = []byte{0x28, 0xb5, 0x2f, 0xfd} - skippableFrameMagic = []byte{0x2a, 0x4d, 0x18} +const ( + frameMagic = "\x28\xb5\x2f\xfd" + skippableFrameMagic = "\x2a\x4d\x18" ) func newFrameDec(o decoderOptions) *frameDec { @@ -84,25 +73,25 @@ func (d *frameDec) reset(br byteBuffer) error { switch err { case io.EOF, io.ErrUnexpectedEOF: return io.EOF - default: - return err case nil: signature[0] = b[0] + default: + return err } // Read the rest, don't allow io.ErrUnexpectedEOF b, err = br.readSmall(3) switch err { case io.EOF: return io.EOF - default: - return err case nil: copy(signature[1:], b) + default: + return err } - if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 { + if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 { if debugDecoder { - println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic)) + println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString([]byte(skippableFrameMagic))) } // Break if not skippable frame. break @@ -117,7 +106,7 @@ func (d *frameDec) reset(br byteBuffer) error { } n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) println("Skipping frame with", n, "bytes.") - err = br.skipN(int(n)) + err = br.skipN(int64(n)) if err != nil { if debugDecoder { println("Reading discarded frame", err) @@ -125,9 +114,9 @@ func (d *frameDec) reset(br byteBuffer) error { return err } } - if !bytes.Equal(signature[:], frameMagic) { + if string(signature[:]) != frameMagic { if debugDecoder { - println("Got magic numbers: ", signature, "want:", frameMagic) + println("Got magic numbers: ", signature, "want:", []byte(frameMagic)) } return ErrMagicMismatch } @@ -166,7 +155,7 @@ func (d *frameDec) reset(br byteBuffer) error { // Read Dictionary_ID // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id - d.DictionaryID = nil + d.DictionaryID = 0 if size := fhd & 3; size != 0 { if size == 3 { size = 4 @@ -178,7 +167,7 @@ func (d *frameDec) reset(br byteBuffer) error { return err } var id uint32 - switch size { + switch len(b) { case 1: id = uint32(b[0]) case 2: @@ -189,11 +178,7 @@ func (d *frameDec) reset(br byteBuffer) error { if debugDecoder { println("Dict size", size, "ID:", id) } - if id > 0 { - // ID 0 means "sorry, no dictionary anyway". - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format - d.DictionaryID = &id - } + d.DictionaryID = id } // Read Frame_Content_Size @@ -208,14 +193,14 @@ func (d *frameDec) reset(br byteBuffer) error { default: fcsSize = 1 << v } - d.FrameContentSize = 0 + d.FrameContentSize = fcsUnknown if fcsSize > 0 { b, err := br.readSmall(fcsSize) if err != nil { println("Reading Frame content", err) return err } - switch fcsSize { + switch len(b) { case 1: d.FrameContentSize = uint64(b[0]) case 2: @@ -229,9 +214,10 @@ func (d *frameDec) reset(br byteBuffer) error { d.FrameContentSize = uint64(d1) | (uint64(d2) << 32) } if debugDecoder { - println("field size bits:", v, "fcsSize:", fcsSize, "FrameContentSize:", d.FrameContentSize, hex.EncodeToString(b[:fcsSize]), "singleseg:", d.SingleSegment, "window:", d.WindowSize) + println("Read FCS:", d.FrameContentSize) } } + // Move this to shared. d.HasCheckSum = fhd&(1<<2) != 0 if d.HasCheckSum { @@ -241,20 +227,27 @@ func (d *frameDec) reset(br byteBuffer) error { d.crc.Reset() } + if d.WindowSize > d.o.maxWindowSize { + if debugDecoder { + printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize) + } + return ErrWindowSizeExceeded + } + if d.WindowSize == 0 && d.SingleSegment { // We may not need window in this case. d.WindowSize = d.FrameContentSize if d.WindowSize < MinWindowSize { d.WindowSize = MinWindowSize } - } - - if d.WindowSize > uint64(d.o.maxWindowSize) { - if debugDecoder { - printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize) + if d.WindowSize > d.o.maxDecodedSize { + if debugDecoder { + printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize) + } + return ErrDecoderSizeExceeded } - return ErrWindowSizeExceeded } + // The minimum Window_Size is 1 KB. if d.WindowSize < MinWindowSize { if debugDecoder { @@ -263,11 +256,23 @@ func (d *frameDec) reset(br byteBuffer) error { return ErrWindowSizeTooSmall } d.history.windowSize = int(d.WindowSize) - if d.o.lowMem && d.history.windowSize < maxBlockSize { - d.history.maxSize = d.history.windowSize * 2 + if !d.o.lowMem || d.history.windowSize < maxBlockSize { + // Alloc 2x window size if not low-mem, or window size below 2MB. + d.history.allocFrameBuffer = d.history.windowSize * 2 } else { - d.history.maxSize = d.history.windowSize + maxBlockSize + if d.o.lowMem { + // Alloc with 1MB extra. + d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize/2 + } else { + // Alloc with 2MB extra. + d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize + } } + + if debugDecoder { + println("Frame: Dict:", d.DictionaryID, "FrameContentSize:", d.FrameContentSize, "singleseg:", d.SingleSegment, "window:", d.WindowSize, "crc:", d.HasCheckSum) + } + // history contains input - maybe we do something d.rawInput = br return nil @@ -276,209 +281,85 @@ func (d *frameDec) reset(br byteBuffer) error { // next will start decoding the next block from stream. func (d *frameDec) next(block *blockDec) error { if debugDecoder { - printf("decoding new block %p:%p", block, block.data) + println("decoding new block") } err := block.reset(d.rawInput, d.WindowSize) if err != nil { println("block error:", err) // Signal the frame decoder we have a problem. - d.sendErr(block, err) + block.sendErr(err) return err } - block.input <- struct{}{} - if debugDecoder { - println("next block:", block) - } - d.asyncRunningMu.Lock() - defer d.asyncRunningMu.Unlock() - if !d.asyncRunning { - return nil - } - if block.Last { - // We indicate the frame is done by sending io.EOF - d.decoding <- block - return io.EOF - } - d.decoding <- block return nil } -// sendEOF will queue an error block on the frame. -// This will cause the frame decoder to return when it encounters the block. -// Returns true if the decoder was added. -func (d *frameDec) sendErr(block *blockDec, err error) bool { - d.asyncRunningMu.Lock() - defer d.asyncRunningMu.Unlock() - if !d.asyncRunning { - return false - } - - println("sending error", err.Error()) - block.sendErr(err) - d.decoding <- block - return true -} - -// checkCRC will check the checksum if the frame has one. +// checkCRC will check the checksum, assuming the frame has one. // Will return ErrCRCMismatch if crc check failed, otherwise nil. func (d *frameDec) checkCRC() error { - if !d.HasCheckSum { - return nil - } - var tmp [4]byte - got := d.crc.Sum64() - // Flip to match file order. - tmp[0] = byte(got >> 0) - tmp[1] = byte(got >> 8) - tmp[2] = byte(got >> 16) - tmp[3] = byte(got >> 24) - // We can overwrite upper tmp now - want, err := d.rawInput.readSmall(4) + buf, err := d.rawInput.readSmall(4) if err != nil { println("CRC missing?", err) return err } - if !bytes.Equal(tmp[:], want) { + want := binary.LittleEndian.Uint32(buf[:4]) + got := uint32(d.crc.Sum64()) + + if got != want { if debugDecoder { - println("CRC Check Failed:", tmp[:], "!=", want) + printf("CRC check failed: got %08x, want %08x\n", got, want) } return ErrCRCMismatch } if debugDecoder { - println("CRC ok", tmp[:]) + printf("CRC ok %08x\n", got) } return nil } -func (d *frameDec) initAsync() { - if !d.o.lowMem && !d.SingleSegment { - // set max extra size history to 2MB. - d.history.maxSize = d.history.windowSize + maxBlockSize - } - // re-alloc if more than one extra block size. - if d.o.lowMem && cap(d.history.b) > d.history.maxSize+maxBlockSize { - d.history.b = make([]byte, 0, d.history.maxSize) - } - if cap(d.history.b) < d.history.maxSize { - d.history.b = make([]byte, 0, d.history.maxSize) - } - if cap(d.decoding) < d.o.concurrent { - d.decoding = make(chan *blockDec, d.o.concurrent) - } - if debugDecoder { - h := d.history - printf("history init. len: %d, cap: %d", len(h.b), cap(h.b)) - } - d.asyncRunningMu.Lock() - d.asyncRunning = true - d.asyncRunningMu.Unlock() -} - -// startDecoder will start decoding blocks and write them to the writer. -// The decoder will stop as soon as an error occurs or at end of frame. -// When the frame has finished decoding the *bufio.Reader -// containing the remaining input will be sent on frameDec.frameDone. -func (d *frameDec) startDecoder(output chan decodeOutput) { - written := int64(0) - - defer func() { - d.asyncRunningMu.Lock() - d.asyncRunning = false - d.asyncRunningMu.Unlock() - - // Drain the currently decoding. - d.history.error = true - flushdone: - for { - select { - case b := <-d.decoding: - b.history <- &d.history - output <- <-b.result - default: - break flushdone - } - } - println("frame decoder done, signalling done") - d.frameDone.Done() - }() - // Get decoder for first block. - block := <-d.decoding - block.history <- &d.history - for { - var next *blockDec - // Get result - r := <-block.result - if r.err != nil { - println("Result contained error", r.err) - output <- r - return - } - if debugDecoder { - println("got result, from ", d.offset, "to", d.offset+int64(len(r.b))) - d.offset += int64(len(r.b)) - } - if !block.Last { - // Send history to next block - select { - case next = <-d.decoding: - if debugDecoder { - println("Sending ", len(d.history.b), "bytes as history") - } - next.history <- &d.history - default: - // Wait until we have sent the block, so - // other decoders can potentially get the decoder. - next = nil - } - } - - // Add checksum, async to decoding. - if d.HasCheckSum { - n, err := d.crc.Write(r.b) - if err != nil { - r.err = err - if n != len(r.b) { - r.err = io.ErrShortWrite - } - output <- r - return - } - } - written += int64(len(r.b)) - if d.SingleSegment && uint64(written) > d.FrameContentSize { - println("runDecoder: single segment and", uint64(written), ">", d.FrameContentSize) - r.err = ErrFrameSizeExceeded - output <- r - return - } - if block.Last { - r.err = d.checkCRC() - output <- r - return - } - output <- r - if next == nil { - // There was no decoder available, we wait for one now that we have sent to the writer. - if debugDecoder { - println("Sending ", len(d.history.b), " bytes as history") - } - next = <-d.decoding - next.history <- &d.history - } - block = next +// consumeCRC skips over the checksum, assuming the frame has one. +func (d *frameDec) consumeCRC() error { + _, err := d.rawInput.readSmall(4) + if err != nil { + println("CRC missing?", err) } + return err } -// runDecoder will create a sync decoder that will decode a block of data. +// runDecoder will run the decoder for the remainder of the frame. func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) { saved := d.history.b // We use the history for output to avoid copying it. d.history.b = dst + d.history.ignoreBuffer = len(dst) // Store input length, so we only check new data. crcStart := len(dst) + d.history.decoders.maxSyncLen = 0 + if d.o.limitToCap { + d.history.decoders.maxSyncLen = uint64(cap(dst) - len(dst)) + } + if d.FrameContentSize != fcsUnknown { + if !d.o.limitToCap || d.FrameContentSize+uint64(len(dst)) < d.history.decoders.maxSyncLen { + d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst)) + } + if d.history.decoders.maxSyncLen > d.o.maxDecodedSize { + if debugDecoder { + println("maxSyncLen:", d.history.decoders.maxSyncLen, "> maxDecodedSize:", d.o.maxDecodedSize) + } + return dst, ErrDecoderSizeExceeded + } + if debugDecoder { + println("maxSyncLen:", d.history.decoders.maxSyncLen) + } + if !d.o.limitToCap && uint64(cap(dst)) < d.history.decoders.maxSyncLen { + // Alloc for output + dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc) + copy(dst2, dst) + dst = dst2 + } + } var err error for { err = dec.reset(d.rawInput, d.WindowSize) @@ -489,30 +370,41 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) { println("next block:", dec) } err = dec.decodeBuf(&d.history) - if err != nil || dec.Last { + if err != nil { + break + } + if uint64(len(d.history.b)-crcStart) > d.o.maxDecodedSize { + println("runDecoder: maxDecodedSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.o.maxDecodedSize) + err = ErrDecoderSizeExceeded break } - if uint64(len(d.history.b)) > d.o.maxDecodedSize { + if d.o.limitToCap && len(d.history.b) > cap(dst) { + println("runDecoder: cap exceeded", uint64(len(d.history.b)), ">", cap(dst)) err = ErrDecoderSizeExceeded break } - if d.SingleSegment && uint64(len(d.history.b)) > d.o.maxDecodedSize { - println("runDecoder: single segment and", uint64(len(d.history.b)), ">", d.o.maxDecodedSize) + if uint64(len(d.history.b)-crcStart) > d.FrameContentSize { + println("runDecoder: FrameContentSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.FrameContentSize) err = ErrFrameSizeExceeded break } + if dec.Last { + break + } + if debugDecoder { + println("runDecoder: FrameContentSize", uint64(len(d.history.b)-crcStart), "<=", d.FrameContentSize) + } } dst = d.history.b if err == nil { - if d.HasCheckSum { - var n int - n, err = d.crc.Write(dst[crcStart:]) - if err == nil { - if n != len(dst)-crcStart { - err = io.ErrShortWrite - } else { - err = d.checkCRC() - } + if d.FrameContentSize != fcsUnknown && uint64(len(d.history.b)-crcStart) != d.FrameContentSize { + err = ErrFrameSizeMismatch + } else if d.HasCheckSum { + if d.o.ignoreChecksum { + err = d.consumeCRC() + } else { + d.crc.Write(dst[crcStart:]) + err = d.checkCRC() } } } diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go index e6d3d49b..2f8860a7 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go @@ -5,8 +5,10 @@ package zstd import ( + "encoding/binary" "errors" "fmt" + "io" ) const ( @@ -178,10 +180,32 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { return fmt.Errorf("corruption detected (total %d != %d)", gotTotal, 1<> 3) - // println(s.norm[:s.symbolLen], s.symbolLen) return s.buildDtable() } +func (s *fseDecoder) mustReadFrom(r io.Reader) { + fatalErr := func(err error) { + if err != nil { + panic(err) + } + } + // dt [maxTablesize]decSymbol // Decompression table. + // symbolLen uint16 // Length of active part of the symbol table. + // actualTableLog uint8 // Selected tablelog. + // maxBits uint8 // Maximum number of additional bits + // // used for table creation to avoid allocations. + // stateTable [256]uint16 + // norm [maxSymbolValue + 1]int16 + // preDefined bool + fatalErr(binary.Read(r, binary.LittleEndian, &s.dt)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.symbolLen)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.actualTableLog)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.maxBits)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.stateTable)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.norm)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.preDefined)) +} + // decSymbol contains information about a state entry, // Including the state offset base, the output symbol and // the number of bits to read for the low part of the destination state. @@ -204,18 +228,10 @@ func (d decSymbol) newState() uint16 { return uint16(d >> 16) } -func (d decSymbol) baseline() uint32 { - return uint32(d >> 32) -} - func (d decSymbol) baselineInt() int { return int(d >> 32) } -func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) { - *d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32) -} - func (d *decSymbol) setNBits(nBits uint8) { const mask = 0xffffffffffffff00 *d = (*d & mask) | decSymbol(nBits) @@ -231,11 +247,6 @@ func (d *decSymbol) setNewState(state uint16) { *d = (*d & mask) | decSymbol(state)<<16 } -func (d *decSymbol) setBaseline(baseline uint32) { - const mask = 0xffffffff - *d = (*d & mask) | decSymbol(baseline)<<32 -} - func (d *decSymbol) setExt(addBits uint8, baseline uint32) { const mask = 0xffff00ff *d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32) @@ -257,68 +268,6 @@ func (s *fseDecoder) setRLE(symbol decSymbol) { s.dt[0] = symbol } -// buildDtable will build the decoding table. -func (s *fseDecoder) buildDtable() error { - tableSize := uint32(1 << s.actualTableLog) - highThreshold := tableSize - 1 - symbolNext := s.stateTable[:256] - - // Init, lay down lowprob symbols - { - for i, v := range s.norm[:s.symbolLen] { - if v == -1 { - s.dt[highThreshold].setAddBits(uint8(i)) - highThreshold-- - symbolNext[i] = 1 - } else { - symbolNext[i] = uint16(v) - } - } - } - // Spread symbols - { - tableMask := tableSize - 1 - step := tableStep(tableSize) - position := uint32(0) - for ss, v := range s.norm[:s.symbolLen] { - for i := 0; i < int(v); i++ { - s.dt[position].setAddBits(uint8(ss)) - position = (position + step) & tableMask - for position > highThreshold { - // lowprob area - position = (position + step) & tableMask - } - } - } - if position != 0 { - // position must reach all cells once, otherwise normalizedCounter is incorrect - return errors.New("corrupted input (position != 0)") - } - } - - // Build Decoding table - { - tableSize := uint16(1 << s.actualTableLog) - for u, v := range s.dt[:tableSize] { - symbol := v.addBits() - nextState := symbolNext[symbol] - symbolNext[symbol] = nextState + 1 - nBits := s.actualTableLog - byte(highBits(uint32(nextState))) - s.dt[u&maxTableMask].setNBits(nBits) - newState := (nextState << nBits) - tableSize - if newState > tableSize { - return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize) - } - if newState == uint16(u) && nBits == 0 { - // Seems weird that this is possible with nbits > 0. - return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u) - } - s.dt[u&maxTableMask].setNewState(newState) - } - } - return nil -} - // transform will transform the decoder table into a table usable for // decoding without having to apply the transformation while decoding. // The state will contain the base value and the number of bits to read. @@ -352,34 +301,7 @@ func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) { s.state = dt[br.getBits(tableLog)] } -// next returns the current symbol and sets the next state. -// At least tablelog bits must be available in the bit reader. -func (s *fseState) next(br *bitReader) { - lowBits := uint16(br.getBits(s.state.nbBits())) - s.state = s.dt[s.state.newState()+lowBits] -} - -// finished returns true if all bits have been read from the bitstream -// and the next state would require reading bits from the input. -func (s *fseState) finished(br *bitReader) bool { - return br.finished() && s.state.nbBits() > 0 -} - -// final returns the current state symbol without decoding the next. -func (s *fseState) final() (int, uint8) { - return s.state.baselineInt(), s.state.addBits() -} - // final returns the current state symbol without decoding the next. func (s decSymbol) final() (int, uint8) { return s.baselineInt(), s.addBits() } - -// nextFast returns the next symbol and sets the next state. -// This can only be used if no symbols are 0 bits. -// At least tablelog bits must be available in the bit reader. -func (s *fseState) nextFast(br *bitReader) (uint32, uint8) { - lowBits := uint16(br.getBitsFast(s.state.nbBits())) - s.state = s.dt[s.state.newState()+lowBits] - return s.state.baseline(), s.state.addBits() -} diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go new file mode 100644 index 00000000..d04a829b --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go @@ -0,0 +1,65 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +package zstd + +import ( + "fmt" +) + +type buildDtableAsmContext struct { + // inputs + stateTable *uint16 + norm *int16 + dt *uint64 + + // outputs --- set by the procedure in the case of error; + // for interpretation please see the error handling part below + errParam1 uint64 + errParam2 uint64 +} + +// buildDtable_asm is an x86 assembly implementation of fseDecoder.buildDtable. +// Function returns non-zero exit code on error. +// +//go:noescape +func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int + +// please keep in sync with _generate/gen_fse.go +const ( + errorCorruptedNormalizedCounter = 1 + errorNewStateTooBig = 2 + errorNewStateNoBits = 3 +) + +// buildDtable will build the decoding table. +func (s *fseDecoder) buildDtable() error { + ctx := buildDtableAsmContext{ + stateTable: &s.stateTable[0], + norm: &s.norm[0], + dt: (*uint64)(&s.dt[0]), + } + code := buildDtable_asm(s, &ctx) + + if code != 0 { + switch code { + case errorCorruptedNormalizedCounter: + position := ctx.errParam1 + return fmt.Errorf("corrupted input (position=%d, expected 0)", position) + + case errorNewStateTooBig: + newState := decSymbol(ctx.errParam1) + size := ctx.errParam2 + return fmt.Errorf("newState (%d) outside table size (%d)", newState, size) + + case errorNewStateNoBits: + newState := decSymbol(ctx.errParam1) + oldState := decSymbol(ctx.errParam2) + return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, oldState) + + default: + return fmt.Errorf("buildDtable_asm returned unhandled nonzero code = %d", code) + } + } + return nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s new file mode 100644 index 00000000..bcde3986 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s @@ -0,0 +1,126 @@ +// Code generated by command: go run gen_fse.go -out ../fse_decoder_amd64.s -pkg=zstd. DO NOT EDIT. + +//go:build !appengine && !noasm && gc && !noasm + +// func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int +TEXT ·buildDtable_asm(SB), $0-24 + MOVQ ctx+8(FP), CX + MOVQ s+0(FP), DI + + // Load values + MOVBQZX 4098(DI), DX + XORQ AX, AX + BTSQ DX, AX + MOVQ (CX), BX + MOVQ 16(CX), SI + LEAQ -1(AX), R8 + MOVQ 8(CX), CX + MOVWQZX 4096(DI), DI + + // End load values + // Init, lay down lowprob symbols + XORQ R9, R9 + JMP init_main_loop_condition + +init_main_loop: + MOVWQSX (CX)(R9*2), R10 + CMPW R10, $-1 + JNE do_not_update_high_threshold + MOVB R9, 1(SI)(R8*8) + DECQ R8 + MOVQ $0x0000000000000001, R10 + +do_not_update_high_threshold: + MOVW R10, (BX)(R9*2) + INCQ R9 + +init_main_loop_condition: + CMPQ R9, DI + JL init_main_loop + + // Spread symbols + // Calculate table step + MOVQ AX, R9 + SHRQ $0x01, R9 + MOVQ AX, R10 + SHRQ $0x03, R10 + LEAQ 3(R9)(R10*1), R9 + + // Fill add bits values + LEAQ -1(AX), R10 + XORQ R11, R11 + XORQ R12, R12 + JMP spread_main_loop_condition + +spread_main_loop: + XORQ R13, R13 + MOVWQSX (CX)(R12*2), R14 + JMP spread_inner_loop_condition + +spread_inner_loop: + MOVB R12, 1(SI)(R11*8) + +adjust_position: + ADDQ R9, R11 + ANDQ R10, R11 + CMPQ R11, R8 + JG adjust_position + INCQ R13 + +spread_inner_loop_condition: + CMPQ R13, R14 + JL spread_inner_loop + INCQ R12 + +spread_main_loop_condition: + CMPQ R12, DI + JL spread_main_loop + TESTQ R11, R11 + JZ spread_check_ok + MOVQ ctx+8(FP), AX + MOVQ R11, 24(AX) + MOVQ $+1, ret+16(FP) + RET + +spread_check_ok: + // Build Decoding table + XORQ DI, DI + +build_table_main_table: + MOVBQZX 1(SI)(DI*8), CX + MOVWQZX (BX)(CX*2), R8 + LEAQ 1(R8), R9 + MOVW R9, (BX)(CX*2) + MOVQ R8, R9 + BSRQ R9, R9 + MOVQ DX, CX + SUBQ R9, CX + SHLQ CL, R8 + SUBQ AX, R8 + MOVB CL, (SI)(DI*8) + MOVW R8, 2(SI)(DI*8) + CMPQ R8, AX + JLE build_table_check1_ok + MOVQ ctx+8(FP), CX + MOVQ R8, 24(CX) + MOVQ AX, 32(CX) + MOVQ $+2, ret+16(FP) + RET + +build_table_check1_ok: + TESTB CL, CL + JNZ build_table_check2_ok + CMPW R8, DI + JNE build_table_check2_ok + MOVQ ctx+8(FP), AX + MOVQ R8, 24(AX) + MOVQ DI, 32(AX) + MOVQ $+3, ret+16(FP) + RET + +build_table_check2_ok: + INCQ DI + CMPQ DI, AX + JL build_table_main_table + MOVQ $+0, ret+16(FP) + RET diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go new file mode 100644 index 00000000..332e51fe --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go @@ -0,0 +1,72 @@ +//go:build !amd64 || appengine || !gc || noasm +// +build !amd64 appengine !gc noasm + +package zstd + +import ( + "errors" + "fmt" +) + +// buildDtable will build the decoding table. +func (s *fseDecoder) buildDtable() error { + tableSize := uint32(1 << s.actualTableLog) + highThreshold := tableSize - 1 + symbolNext := s.stateTable[:256] + + // Init, lay down lowprob symbols + { + for i, v := range s.norm[:s.symbolLen] { + if v == -1 { + s.dt[highThreshold].setAddBits(uint8(i)) + highThreshold-- + symbolNext[i] = 1 + } else { + symbolNext[i] = uint16(v) + } + } + } + + // Spread symbols + { + tableMask := tableSize - 1 + step := tableStep(tableSize) + position := uint32(0) + for ss, v := range s.norm[:s.symbolLen] { + for i := 0; i < int(v); i++ { + s.dt[position].setAddBits(uint8(ss)) + position = (position + step) & tableMask + for position > highThreshold { + // lowprob area + position = (position + step) & tableMask + } + } + } + if position != 0 { + // position must reach all cells once, otherwise normalizedCounter is incorrect + return errors.New("corrupted input (position != 0)") + } + } + + // Build Decoding table + { + tableSize := uint16(1 << s.actualTableLog) + for u, v := range s.dt[:tableSize] { + symbol := v.addBits() + nextState := symbolNext[symbol] + symbolNext[symbol] = nextState + 1 + nBits := s.actualTableLog - byte(highBits(uint32(nextState))) + s.dt[u&maxTableMask].setNBits(nBits) + newState := (nextState << nBits) - tableSize + if newState > tableSize { + return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize) + } + if newState == uint16(u) && nBits == 0 { + // Seems weird that this is possible with nbits > 0. + return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u) + } + s.dt[u&maxTableMask].setNewState(newState) + } + } + return nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go index b4757ee3..ab26326a 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go @@ -62,9 +62,8 @@ func (s symbolTransform) String() string { // To indicate that you have populated the histogram call HistogramFinished // with the value of the highest populated symbol, as well as the number of entries // in the most populated entry. These are accepted at face value. -// The returned slice will always be length 256. -func (s *fseEncoder) Histogram() []uint32 { - return s.count[:] +func (s *fseEncoder) Histogram() *[256]uint32 { + return &s.count } // HistogramFinished can be called to indicate that the histogram has been populated. @@ -77,21 +76,6 @@ func (s *fseEncoder) HistogramFinished(maxSymbol uint8, maxCount int) { s.clearCount = maxCount != 0 } -// prepare will prepare and allocate scratch tables used for both compression and decompression. -func (s *fseEncoder) prepare() (*fseEncoder, error) { - if s == nil { - s = &fseEncoder{} - } - s.useRLE = false - if s.clearCount && s.maxCount == 0 { - for i := range s.count { - s.count[i] = 0 - } - s.clearCount = false - } - return s, nil -} - // allocCtable will allocate tables needed for compression. // If existing tables a re big enough, they are simply re-used. func (s *fseEncoder) allocCtable() { @@ -710,14 +694,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) { c.state = c.stateTable[lu] } -// encode the output symbol provided and write it to the bitstream. -func (c *cState) encode(symbolTT symbolTransform) { - nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16 - dstState := int32(c.state>>(nbBitsOut&15)) + int32(symbolTT.deltaFindState) - c.bw.addBits16NC(c.state, uint8(nbBitsOut)) - c.state = c.stateTable[dstState] -} - // flush will write the tablelog to the output and flush the remaining full bytes. func (c *cState) flush(tableLog uint8) { c.bw.flush32() diff --git a/vendor/github.com/klauspost/compress/zstd/hash.go b/vendor/github.com/klauspost/compress/zstd/hash.go index cf33f29a..5d73c21e 100644 --- a/vendor/github.com/klauspost/compress/zstd/hash.go +++ b/vendor/github.com/klauspost/compress/zstd/hash.go @@ -33,9 +33,3 @@ func hashLen(u uint64, length, mls uint8) uint32 { return (uint32(u) * prime4bytes) >> (32 - length) } } - -// hash3 returns the hash of the lower 3 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <32. -func hash3(u uint32, h uint8) uint32 { - return ((u << (32 - 24)) * prime3bytes) >> ((32 - h) & 31) -} diff --git a/vendor/github.com/klauspost/compress/zstd/history.go b/vendor/github.com/klauspost/compress/zstd/history.go index f783e32d..09164856 100644 --- a/vendor/github.com/klauspost/compress/zstd/history.go +++ b/vendor/github.com/klauspost/compress/zstd/history.go @@ -10,40 +10,48 @@ import ( // history contains the information transferred between blocks. type history struct { - b []byte - huffTree *huff0.Scratch - recentOffsets [3]int + // Literal decompression + huffTree *huff0.Scratch + + // Sequence decompression decoders sequenceDecs - windowSize int - maxSize int - error bool - dict *dict + recentOffsets [3]int + + // History buffer... + b []byte + + // ignoreBuffer is meant to ignore a number of bytes + // when checking for matches in history + ignoreBuffer int + + windowSize int + allocFrameBuffer int // needed? + error bool + dict *dict } // reset will reset the history to initial state of a frame. // The history must already have been initialized to the desired size. func (h *history) reset() { h.b = h.b[:0] + h.ignoreBuffer = 0 h.error = false h.recentOffsets = [3]int{1, 4, 8} - if f := h.decoders.litLengths.fse; f != nil && !f.preDefined { - fseDecoderPool.Put(f) - } - if f := h.decoders.offsets.fse; f != nil && !f.preDefined { - fseDecoderPool.Put(f) - } - if f := h.decoders.matchLengths.fse; f != nil && !f.preDefined { - fseDecoderPool.Put(f) - } - h.decoders = sequenceDecs{} + h.decoders.freeDecoders() + h.decoders = sequenceDecs{br: h.decoders.br} + h.freeHuffDecoder() + h.huffTree = nil + h.dict = nil + //printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b)) +} + +func (h *history) freeHuffDecoder() { if h.huffTree != nil { if h.dict == nil || h.dict.litEnc != h.huffTree { huffDecoderPool.Put(h.huffTree) + h.huffTree = nil } } - h.huffTree = nil - h.dict = nil - //printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b)) } func (h *history) setDict(dict *dict) { @@ -54,6 +62,7 @@ func (h *history) setDict(dict *dict) { h.decoders.litLengths = dict.llDec h.decoders.offsets = dict.ofDec h.decoders.matchLengths = dict.mlDec + h.decoders.dict = dict.content h.recentOffsets = dict.offsets h.huffTree = dict.litEnc } @@ -83,6 +92,24 @@ func (h *history) append(b []byte) { copy(h.b[h.windowSize-len(b):], b) } +// ensureBlock will ensure there is space for at least one block... +func (h *history) ensureBlock() { + if cap(h.b) < h.allocFrameBuffer { + h.b = make([]byte, 0, h.allocFrameBuffer) + return + } + + avail := cap(h.b) - len(h.b) + if avail >= h.windowSize || avail > maxCompressedBlockSize { + return + } + // Move data down so we only have window size left. + // We know we have less than window size in b at this point. + discard := len(h.b) - h.windowSize + copy(h.b, h.b[discard:]) + h.b = h.b[:h.windowSize] +} + // append bytes to history without ever discarding anything. func (h *history) appendKeep(b []byte) { h.b = append(h.b, b...) diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md index 69aa3bb5..777290d4 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md @@ -2,12 +2,7 @@ VENDORED: Go to [github.com/cespare/xxhash](https://github.com/cespare/xxhash) for original package. - -[![GoDoc](https://godoc.org/github.com/cespare/xxhash?status.svg)](https://godoc.org/github.com/cespare/xxhash) -[![Build Status](https://travis-ci.org/cespare/xxhash.svg?branch=master)](https://travis-ci.org/cespare/xxhash) - -xxhash is a Go implementation of the 64-bit -[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a +xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a high-quality hashing algorithm that is much faster than anything in the Go standard library. @@ -28,31 +23,49 @@ func (*Digest) WriteString(string) (int, error) func (*Digest) Sum64() uint64 ``` -This implementation provides a fast pure-Go implementation and an even faster -assembly implementation for amd64. +The package is written with optimized pure Go and also contains even faster +assembly implementations for amd64 and arm64. If desired, the `purego` build tag +opts into using the Go code even on those architectures. + +[xxHash]: http://cyan4973.github.io/xxHash/ + +## Compatibility + +This package is in a module and the latest code is in version 2 of the module. +You need a version of Go with at least "minimal module compatibility" to use +github.com/cespare/xxhash/v2: + +* 1.9.7+ for Go 1.9 +* 1.10.3+ for Go 1.10 +* Go 1.11 or later + +I recommend using the latest release of Go. ## Benchmarks Here are some quick benchmarks comparing the pure-Go and assembly implementations of Sum64. -| input size | purego | asm | -| --- | --- | --- | -| 5 B | 979.66 MB/s | 1291.17 MB/s | -| 100 B | 7475.26 MB/s | 7973.40 MB/s | -| 4 KB | 17573.46 MB/s | 17602.65 MB/s | -| 10 MB | 17131.46 MB/s | 17142.16 MB/s | +| input size | purego | asm | +| ---------- | --------- | --------- | +| 4 B | 1.3 GB/s | 1.2 GB/s | +| 16 B | 2.9 GB/s | 3.5 GB/s | +| 100 B | 6.9 GB/s | 8.1 GB/s | +| 4 KB | 11.7 GB/s | 16.7 GB/s | +| 10 MB | 12.0 GB/s | 17.3 GB/s | -These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using -the following commands under Go 1.11.2: +These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C +CPU using the following commands under Go 1.19.2: ``` -$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes' -$ go test -benchtime 10s -bench '/xxhash,direct,bytes' +benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$') +benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$') ``` ## Projects using this package - [InfluxDB](https://github.com/influxdata/influxdb) - [Prometheus](https://github.com/prometheus/prometheus) +- [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) - [FreeCache](https://github.com/coocood/freecache) +- [FastCache](https://github.com/VictoriaMetrics/fastcache) diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go index 2c112a0a..fc40c820 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go @@ -18,19 +18,11 @@ const ( prime5 uint64 = 2870177450012600261 ) -// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where -// possible in the Go code is worth a small (but measurable) performance boost -// by avoiding some MOVQs. Vars are needed for the asm and also are useful for -// convenience in the Go code in a few places where we need to intentionally -// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the -// result overflows a uint64). -var ( - prime1v = prime1 - prime2v = prime2 - prime3v = prime3 - prime4v = prime4 - prime5v = prime5 -) +// Store the primes in an array as well. +// +// The consts are used when possible in Go code to avoid MOVs but we need a +// contiguous array of the assembly code. +var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5} // Digest implements hash.Hash64. type Digest struct { @@ -52,10 +44,10 @@ func New() *Digest { // Reset clears the Digest's state so that it can be reused. func (d *Digest) Reset() { - d.v1 = prime1v + prime2 + d.v1 = primes[0] + prime2 d.v2 = prime2 d.v3 = 0 - d.v4 = -prime1v + d.v4 = -primes[0] d.total = 0 d.n = 0 } @@ -71,21 +63,23 @@ func (d *Digest) Write(b []byte) (n int, err error) { n = len(b) d.total += uint64(n) + memleft := d.mem[d.n&(len(d.mem)-1):] + if d.n+n < 32 { // This new data doesn't even fill the current block. - copy(d.mem[d.n:], b) + copy(memleft, b) d.n += n return } if d.n > 0 { // Finish off the partial block. - copy(d.mem[d.n:], b) + c := copy(memleft, b) d.v1 = round(d.v1, u64(d.mem[0:8])) d.v2 = round(d.v2, u64(d.mem[8:16])) d.v3 = round(d.v3, u64(d.mem[16:24])) d.v4 = round(d.v4, u64(d.mem[24:32])) - b = b[32-d.n:] + b = b[c:] d.n = 0 } @@ -135,21 +129,20 @@ func (d *Digest) Sum64() uint64 { h += d.total - i, end := 0, d.n - for ; i+8 <= end; i += 8 { - k1 := round(0, u64(d.mem[i:i+8])) + b := d.mem[:d.n&(len(d.mem)-1)] + for ; len(b) >= 8; b = b[8:] { + k1 := round(0, u64(b[:8])) h ^= k1 h = rol27(h)*prime1 + prime4 } - if i+4 <= end { - h ^= uint64(u32(d.mem[i:i+4])) * prime1 + if len(b) >= 4 { + h ^= uint64(u32(b[:4])) * prime1 h = rol23(h)*prime2 + prime3 - i += 4 + b = b[4:] } - for i < end { - h ^= uint64(d.mem[i]) * prime5 + for ; len(b) > 0; b = b[1:] { + h ^= uint64(b[0]) * prime5 h = rol11(h) * prime1 - i++ } h ^= h >> 33 diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go deleted file mode 100644 index 0ae847f7..00000000 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go +++ /dev/null @@ -1,12 +0,0 @@ -//go:build !appengine && gc && !purego -// +build !appengine,gc,!purego - -package xxhash - -// Sum64 computes the 64-bit xxHash digest of b. -// -//go:noescape -func Sum64(b []byte) uint64 - -//go:noescape -func writeBlocks(d *Digest, b []byte) int diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s index be8db5bf..ddb63aa9 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s @@ -1,215 +1,210 @@ +//go:build !appengine && gc && !purego && !noasm // +build !appengine // +build gc // +build !purego +// +build !noasm #include "textflag.h" -// Register allocation: -// AX h -// SI pointer to advance through b -// DX n -// BX loop end -// R8 v1, k1 -// R9 v2 -// R10 v3 -// R11 v4 -// R12 tmp -// R13 prime1v -// R14 prime2v -// DI prime4v - -// round reads from and advances the buffer pointer in SI. -// It assumes that R13 has prime1v and R14 has prime2v. -#define round(r) \ - MOVQ (SI), R12 \ - ADDQ $8, SI \ - IMULQ R14, R12 \ - ADDQ R12, r \ - ROLQ $31, r \ - IMULQ R13, r - -// mergeRound applies a merge round on the two registers acc and val. -// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v. -#define mergeRound(acc, val) \ - IMULQ R14, val \ - ROLQ $31, val \ - IMULQ R13, val \ - XORQ val, acc \ - IMULQ R13, acc \ - ADDQ DI, acc +// Registers: +#define h AX +#define d AX +#define p SI // pointer to advance through b +#define n DX +#define end BX // loop end +#define v1 R8 +#define v2 R9 +#define v3 R10 +#define v4 R11 +#define x R12 +#define prime1 R13 +#define prime2 R14 +#define prime4 DI + +#define round(acc, x) \ + IMULQ prime2, x \ + ADDQ x, acc \ + ROLQ $31, acc \ + IMULQ prime1, acc + +// round0 performs the operation x = round(0, x). +#define round0(x) \ + IMULQ prime2, x \ + ROLQ $31, x \ + IMULQ prime1, x + +// mergeRound applies a merge round on the two registers acc and x. +// It assumes that prime1, prime2, and prime4 have been loaded. +#define mergeRound(acc, x) \ + round0(x) \ + XORQ x, acc \ + IMULQ prime1, acc \ + ADDQ prime4, acc + +// blockLoop processes as many 32-byte blocks as possible, +// updating v1, v2, v3, and v4. It assumes that there is at least one block +// to process. +#define blockLoop() \ +loop: \ + MOVQ +0(p), x \ + round(v1, x) \ + MOVQ +8(p), x \ + round(v2, x) \ + MOVQ +16(p), x \ + round(v3, x) \ + MOVQ +24(p), x \ + round(v4, x) \ + ADDQ $32, p \ + CMPQ p, end \ + JLE loop // func Sum64(b []byte) uint64 -TEXT ·Sum64(SB), NOSPLIT, $0-32 +TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 // Load fixed primes. - MOVQ ·prime1v(SB), R13 - MOVQ ·prime2v(SB), R14 - MOVQ ·prime4v(SB), DI + MOVQ ·primes+0(SB), prime1 + MOVQ ·primes+8(SB), prime2 + MOVQ ·primes+24(SB), prime4 // Load slice. - MOVQ b_base+0(FP), SI - MOVQ b_len+8(FP), DX - LEAQ (SI)(DX*1), BX + MOVQ b_base+0(FP), p + MOVQ b_len+8(FP), n + LEAQ (p)(n*1), end // The first loop limit will be len(b)-32. - SUBQ $32, BX + SUBQ $32, end // Check whether we have at least one block. - CMPQ DX, $32 + CMPQ n, $32 JLT noBlocks // Set up initial state (v1, v2, v3, v4). - MOVQ R13, R8 - ADDQ R14, R8 - MOVQ R14, R9 - XORQ R10, R10 - XORQ R11, R11 - SUBQ R13, R11 - - // Loop until SI > BX. -blockLoop: - round(R8) - round(R9) - round(R10) - round(R11) - - CMPQ SI, BX - JLE blockLoop - - MOVQ R8, AX - ROLQ $1, AX - MOVQ R9, R12 - ROLQ $7, R12 - ADDQ R12, AX - MOVQ R10, R12 - ROLQ $12, R12 - ADDQ R12, AX - MOVQ R11, R12 - ROLQ $18, R12 - ADDQ R12, AX - - mergeRound(AX, R8) - mergeRound(AX, R9) - mergeRound(AX, R10) - mergeRound(AX, R11) + MOVQ prime1, v1 + ADDQ prime2, v1 + MOVQ prime2, v2 + XORQ v3, v3 + XORQ v4, v4 + SUBQ prime1, v4 + + blockLoop() + + MOVQ v1, h + ROLQ $1, h + MOVQ v2, x + ROLQ $7, x + ADDQ x, h + MOVQ v3, x + ROLQ $12, x + ADDQ x, h + MOVQ v4, x + ROLQ $18, x + ADDQ x, h + + mergeRound(h, v1) + mergeRound(h, v2) + mergeRound(h, v3) + mergeRound(h, v4) JMP afterBlocks noBlocks: - MOVQ ·prime5v(SB), AX + MOVQ ·primes+32(SB), h afterBlocks: - ADDQ DX, AX - - // Right now BX has len(b)-32, and we want to loop until SI > len(b)-8. - ADDQ $24, BX - - CMPQ SI, BX - JG fourByte - -wordLoop: - // Calculate k1. - MOVQ (SI), R8 - ADDQ $8, SI - IMULQ R14, R8 - ROLQ $31, R8 - IMULQ R13, R8 - - XORQ R8, AX - ROLQ $27, AX - IMULQ R13, AX - ADDQ DI, AX - - CMPQ SI, BX - JLE wordLoop - -fourByte: - ADDQ $4, BX - CMPQ SI, BX - JG singles - - MOVL (SI), R8 - ADDQ $4, SI - IMULQ R13, R8 - XORQ R8, AX - - ROLQ $23, AX - IMULQ R14, AX - ADDQ ·prime3v(SB), AX - -singles: - ADDQ $4, BX - CMPQ SI, BX + ADDQ n, h + + ADDQ $24, end + CMPQ p, end + JG try4 + +loop8: + MOVQ (p), x + ADDQ $8, p + round0(x) + XORQ x, h + ROLQ $27, h + IMULQ prime1, h + ADDQ prime4, h + + CMPQ p, end + JLE loop8 + +try4: + ADDQ $4, end + CMPQ p, end + JG try1 + + MOVL (p), x + ADDQ $4, p + IMULQ prime1, x + XORQ x, h + + ROLQ $23, h + IMULQ prime2, h + ADDQ ·primes+16(SB), h + +try1: + ADDQ $4, end + CMPQ p, end JGE finalize -singlesLoop: - MOVBQZX (SI), R12 - ADDQ $1, SI - IMULQ ·prime5v(SB), R12 - XORQ R12, AX +loop1: + MOVBQZX (p), x + ADDQ $1, p + IMULQ ·primes+32(SB), x + XORQ x, h + ROLQ $11, h + IMULQ prime1, h - ROLQ $11, AX - IMULQ R13, AX - - CMPQ SI, BX - JL singlesLoop + CMPQ p, end + JL loop1 finalize: - MOVQ AX, R12 - SHRQ $33, R12 - XORQ R12, AX - IMULQ R14, AX - MOVQ AX, R12 - SHRQ $29, R12 - XORQ R12, AX - IMULQ ·prime3v(SB), AX - MOVQ AX, R12 - SHRQ $32, R12 - XORQ R12, AX - - MOVQ AX, ret+24(FP) + MOVQ h, x + SHRQ $33, x + XORQ x, h + IMULQ prime2, h + MOVQ h, x + SHRQ $29, x + XORQ x, h + IMULQ ·primes+16(SB), h + MOVQ h, x + SHRQ $32, x + XORQ x, h + + MOVQ h, ret+24(FP) RET -// writeBlocks uses the same registers as above except that it uses AX to store -// the d pointer. - // func writeBlocks(d *Digest, b []byte) int -TEXT ·writeBlocks(SB), NOSPLIT, $0-40 +TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 // Load fixed primes needed for round. - MOVQ ·prime1v(SB), R13 - MOVQ ·prime2v(SB), R14 + MOVQ ·primes+0(SB), prime1 + MOVQ ·primes+8(SB), prime2 // Load slice. - MOVQ b_base+8(FP), SI - MOVQ b_len+16(FP), DX - LEAQ (SI)(DX*1), BX - SUBQ $32, BX + MOVQ b_base+8(FP), p + MOVQ b_len+16(FP), n + LEAQ (p)(n*1), end + SUBQ $32, end // Load vN from d. - MOVQ d+0(FP), AX - MOVQ 0(AX), R8 // v1 - MOVQ 8(AX), R9 // v2 - MOVQ 16(AX), R10 // v3 - MOVQ 24(AX), R11 // v4 + MOVQ s+0(FP), d + MOVQ 0(d), v1 + MOVQ 8(d), v2 + MOVQ 16(d), v3 + MOVQ 24(d), v4 // We don't need to check the loop condition here; this function is // always called with at least one block of data to process. -blockLoop: - round(R8) - round(R9) - round(R10) - round(R11) - - CMPQ SI, BX - JLE blockLoop + blockLoop() // Copy vN back to d. - MOVQ R8, 0(AX) - MOVQ R9, 8(AX) - MOVQ R10, 16(AX) - MOVQ R11, 24(AX) - - // The number of bytes written is SI minus the old base pointer. - SUBQ b_base+8(FP), SI - MOVQ SI, ret+32(FP) + MOVQ v1, 0(d) + MOVQ v2, 8(d) + MOVQ v3, 16(d) + MOVQ v4, 24(d) + + // The number of bytes written is p minus the old base pointer. + SUBQ b_base+8(FP), p + MOVQ p, ret+32(FP) RET diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s new file mode 100644 index 00000000..17901e08 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s @@ -0,0 +1,184 @@ +//go:build !appengine && gc && !purego && !noasm +// +build !appengine +// +build gc +// +build !purego +// +build !noasm + +#include "textflag.h" + +// Registers: +#define digest R1 +#define h R2 // return value +#define p R3 // input pointer +#define n R4 // input length +#define nblocks R5 // n / 32 +#define prime1 R7 +#define prime2 R8 +#define prime3 R9 +#define prime4 R10 +#define prime5 R11 +#define v1 R12 +#define v2 R13 +#define v3 R14 +#define v4 R15 +#define x1 R20 +#define x2 R21 +#define x3 R22 +#define x4 R23 + +#define round(acc, x) \ + MADD prime2, acc, x, acc \ + ROR $64-31, acc \ + MUL prime1, acc + +// round0 performs the operation x = round(0, x). +#define round0(x) \ + MUL prime2, x \ + ROR $64-31, x \ + MUL prime1, x + +#define mergeRound(acc, x) \ + round0(x) \ + EOR x, acc \ + MADD acc, prime4, prime1, acc + +// blockLoop processes as many 32-byte blocks as possible, +// updating v1, v2, v3, and v4. It assumes that n >= 32. +#define blockLoop() \ + LSR $5, n, nblocks \ + PCALIGN $16 \ + loop: \ + LDP.P 16(p), (x1, x2) \ + LDP.P 16(p), (x3, x4) \ + round(v1, x1) \ + round(v2, x2) \ + round(v3, x3) \ + round(v4, x4) \ + SUB $1, nblocks \ + CBNZ nblocks, loop + +// func Sum64(b []byte) uint64 +TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 + LDP b_base+0(FP), (p, n) + + LDP ·primes+0(SB), (prime1, prime2) + LDP ·primes+16(SB), (prime3, prime4) + MOVD ·primes+32(SB), prime5 + + CMP $32, n + CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 } + BLT afterLoop + + ADD prime1, prime2, v1 + MOVD prime2, v2 + MOVD $0, v3 + NEG prime1, v4 + + blockLoop() + + ROR $64-1, v1, x1 + ROR $64-7, v2, x2 + ADD x1, x2 + ROR $64-12, v3, x3 + ROR $64-18, v4, x4 + ADD x3, x4 + ADD x2, x4, h + + mergeRound(h, v1) + mergeRound(h, v2) + mergeRound(h, v3) + mergeRound(h, v4) + +afterLoop: + ADD n, h + + TBZ $4, n, try8 + LDP.P 16(p), (x1, x2) + + round0(x1) + + // NOTE: here and below, sequencing the EOR after the ROR (using a + // rotated register) is worth a small but measurable speedup for small + // inputs. + ROR $64-27, h + EOR x1 @> 64-27, h, h + MADD h, prime4, prime1, h + + round0(x2) + ROR $64-27, h + EOR x2 @> 64-27, h, h + MADD h, prime4, prime1, h + +try8: + TBZ $3, n, try4 + MOVD.P 8(p), x1 + + round0(x1) + ROR $64-27, h + EOR x1 @> 64-27, h, h + MADD h, prime4, prime1, h + +try4: + TBZ $2, n, try2 + MOVWU.P 4(p), x2 + + MUL prime1, x2 + ROR $64-23, h + EOR x2 @> 64-23, h, h + MADD h, prime3, prime2, h + +try2: + TBZ $1, n, try1 + MOVHU.P 2(p), x3 + AND $255, x3, x1 + LSR $8, x3, x2 + + MUL prime5, x1 + ROR $64-11, h + EOR x1 @> 64-11, h, h + MUL prime1, h + + MUL prime5, x2 + ROR $64-11, h + EOR x2 @> 64-11, h, h + MUL prime1, h + +try1: + TBZ $0, n, finalize + MOVBU (p), x4 + + MUL prime5, x4 + ROR $64-11, h + EOR x4 @> 64-11, h, h + MUL prime1, h + +finalize: + EOR h >> 33, h + MUL prime2, h + EOR h >> 29, h + MUL prime3, h + EOR h >> 32, h + + MOVD h, ret+24(FP) + RET + +// func writeBlocks(d *Digest, b []byte) int +TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 + LDP ·primes+0(SB), (prime1, prime2) + + // Load state. Assume v[1-4] are stored contiguously. + MOVD d+0(FP), digest + LDP 0(digest), (v1, v2) + LDP 16(digest), (v3, v4) + + LDP b_base+8(FP), (p, n) + + blockLoop() + + // Store updated state. + STP (v1, v2), 0(digest) + STP (v3, v4), 16(digest) + + BIC $31, n + MOVD n, ret+32(FP) + RET diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go new file mode 100644 index 00000000..d4221edf --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go @@ -0,0 +1,16 @@ +//go:build (amd64 || arm64) && !appengine && gc && !purego && !noasm +// +build amd64 arm64 +// +build !appengine +// +build gc +// +build !purego +// +build !noasm + +package xxhash + +// Sum64 computes the 64-bit xxHash digest of b. +// +//go:noescape +func Sum64(b []byte) uint64 + +//go:noescape +func writeBlocks(s *Digest, b []byte) int diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go index 1f52f296..0be16cef 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go @@ -1,5 +1,5 @@ -//go:build !amd64 || appengine || !gc || purego -// +build !amd64 appengine !gc purego +//go:build (!amd64 && !arm64) || appengine || !gc || purego || noasm +// +build !amd64,!arm64 appengine !gc purego noasm package xxhash @@ -15,10 +15,10 @@ func Sum64(b []byte) uint64 { var h uint64 if n >= 32 { - v1 := prime1v + prime2 + v1 := primes[0] + prime2 v2 := prime2 v3 := uint64(0) - v4 := -prime1v + v4 := -primes[0] for len(b) >= 32 { v1 = round(v1, u64(b[0:8:len(b)])) v2 = round(v2, u64(b[8:16:len(b)])) @@ -37,19 +37,18 @@ func Sum64(b []byte) uint64 { h += uint64(n) - i, end := 0, len(b) - for ; i+8 <= end; i += 8 { - k1 := round(0, u64(b[i:i+8:len(b)])) + for ; len(b) >= 8; b = b[8:] { + k1 := round(0, u64(b[:8])) h ^= k1 h = rol27(h)*prime1 + prime4 } - if i+4 <= end { - h ^= uint64(u32(b[i:i+4:len(b)])) * prime1 + if len(b) >= 4 { + h ^= uint64(u32(b[:4])) * prime1 h = rol23(h)*prime2 + prime3 - i += 4 + b = b[4:] } - for ; i < end; i++ { - h ^= uint64(b[i]) * prime5 + for ; len(b) > 0; b = b[1:] { + h ^= uint64(b[0]) * prime5 h = rol11(h) * prime1 } diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go new file mode 100644 index 00000000..f41932b7 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.go @@ -0,0 +1,16 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package zstd + +// matchLen returns how many bytes match in a and b +// +// It assumes that: +// +// len(a) <= len(b) and len(a) > 0 +// +//go:noescape +func matchLen(a []byte, b []byte) int diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s new file mode 100644 index 00000000..9a7655c0 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s @@ -0,0 +1,68 @@ +// Copied from S2 implementation. + +//go:build !appengine && !noasm && gc && !noasm + +#include "textflag.h" + +// func matchLen(a []byte, b []byte) int +// Requires: BMI +TEXT ·matchLen(SB), NOSPLIT, $0-56 + MOVQ a_base+0(FP), AX + MOVQ b_base+24(FP), CX + MOVQ a_len+8(FP), DX + + // matchLen + XORL SI, SI + CMPL DX, $0x08 + JB matchlen_match4_standalone + +matchlen_loopback_standalone: + MOVQ (AX)(SI*1), BX + XORQ (CX)(SI*1), BX + TESTQ BX, BX + JZ matchlen_loop_standalone + +#ifdef GOAMD64_v3 + TZCNTQ BX, BX +#else + BSFQ BX, BX +#endif + SARQ $0x03, BX + LEAL (SI)(BX*1), SI + JMP gen_match_len_end + +matchlen_loop_standalone: + LEAL -8(DX), DX + LEAL 8(SI), SI + CMPL DX, $0x08 + JAE matchlen_loopback_standalone + +matchlen_match4_standalone: + CMPL DX, $0x04 + JB matchlen_match2_standalone + MOVL (AX)(SI*1), BX + CMPL (CX)(SI*1), BX + JNE matchlen_match2_standalone + LEAL -4(DX), DX + LEAL 4(SI), SI + +matchlen_match2_standalone: + CMPL DX, $0x02 + JB matchlen_match1_standalone + MOVW (AX)(SI*1), BX + CMPW (CX)(SI*1), BX + JNE matchlen_match1_standalone + LEAL -2(DX), DX + LEAL 2(SI), SI + +matchlen_match1_standalone: + CMPL DX, $0x01 + JB gen_match_len_end + MOVB (AX)(SI*1), BL + CMPB (CX)(SI*1), BL + JNE gen_match_len_end + INCL SI + +gen_match_len_end: + MOVQ SI, ret+48(FP) + RET diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go b/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go new file mode 100644 index 00000000..57b9c31c --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/matchlen_generic.go @@ -0,0 +1,33 @@ +//go:build !amd64 || appengine || !gc || noasm +// +build !amd64 appengine !gc noasm + +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package zstd + +import ( + "encoding/binary" + "math/bits" +) + +// matchLen returns the maximum common prefix length of a and b. +// a must be the shortest of the two. +func matchLen(a, b []byte) (n int) { + for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] { + diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b) + if diff != 0 { + return n + bits.TrailingZeros64(diff)>>3 + } + n += 8 + } + + for i := range a { + if a[i] != b[i] { + break + } + n++ + } + return n + +} diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go index 1dd39e63..9405fcf1 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go @@ -20,6 +20,10 @@ type seq struct { llCode, mlCode, ofCode uint8 } +type seqVals struct { + ll, ml, mo int +} + func (s seq) String() string { if s.offset <= 3 { if s.offset == 0 { @@ -61,16 +65,19 @@ type sequenceDecs struct { offsets sequenceDec matchLengths sequenceDec prevOffset [3]int - hist []byte dict []byte literals []byte out []byte + nSeqs int + br *bitReader + seqSize int windowSize int maxBits uint8 + maxSyncLen uint64 } // initialize all 3 decoders from the stream input. -func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out []byte) error { +func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) error { if err := s.litLengths.init(br); err != nil { return errors.New("litLengths:" + err.Error()) } @@ -80,8 +87,7 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out [] if err := s.matchLengths.init(br); err != nil { return errors.New("matchLengths:" + err.Error()) } - s.literals = literals - s.hist = hist.b + s.br = br s.prevOffset = hist.recentOffsets s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits s.windowSize = hist.windowSize @@ -93,16 +99,149 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out [] return nil } +func (s *sequenceDecs) freeDecoders() { + if f := s.litLengths.fse; f != nil && !f.preDefined { + fseDecoderPool.Put(f) + s.litLengths.fse = nil + } + if f := s.offsets.fse; f != nil && !f.preDefined { + fseDecoderPool.Put(f) + s.offsets.fse = nil + } + if f := s.matchLengths.fse; f != nil && !f.preDefined { + fseDecoderPool.Put(f) + s.matchLengths.fse = nil + } +} + +// execute will execute the decoded sequence with the provided history. +// The sequence must be evaluated before being sent. +func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error { + if len(s.dict) == 0 { + return s.executeSimple(seqs, hist) + } + + // Ensure we have enough output size... + if len(s.out)+s.seqSize > cap(s.out) { + addBytes := s.seqSize + len(s.out) + s.out = append(s.out, make([]byte, addBytes)...) + s.out = s.out[:len(s.out)-addBytes] + } + + if debugDecoder { + printf("Execute %d seqs with hist %d, dict %d, literals: %d into %d bytes\n", len(seqs), len(hist), len(s.dict), len(s.literals), s.seqSize) + } + + var t = len(s.out) + out := s.out[:t+s.seqSize] + + for _, seq := range seqs { + // Add literals + copy(out[t:], s.literals[:seq.ll]) + t += seq.ll + s.literals = s.literals[seq.ll:] + + // Copy from dictionary... + if seq.mo > t+len(hist) || seq.mo > s.windowSize { + if len(s.dict) == 0 { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist)) + } + + // we may be in dictionary. + dictO := len(s.dict) - (seq.mo - (t + len(hist))) + if dictO < 0 || dictO >= len(s.dict) { + return fmt.Errorf("match offset (%d) bigger than current history+dict (%d)", seq.mo, t+len(hist)+len(s.dict)) + } + end := dictO + seq.ml + if end > len(s.dict) { + n := len(s.dict) - dictO + copy(out[t:], s.dict[dictO:]) + t += n + seq.ml -= n + } else { + copy(out[t:], s.dict[dictO:end]) + t += end - dictO + continue + } + } + + // Copy from history. + if v := seq.mo - t; v > 0 { + // v is the start position in history from end. + start := len(hist) - v + if seq.ml > v { + // Some goes into current block. + // Copy remainder of history + copy(out[t:], hist[start:]) + t += v + seq.ml -= v + } else { + copy(out[t:], hist[start:start+seq.ml]) + t += seq.ml + continue + } + } + // We must be in current buffer now + if seq.ml > 0 { + start := t - seq.mo + if seq.ml <= t-start { + // No overlap + copy(out[t:], out[start:start+seq.ml]) + t += seq.ml + continue + } else { + // Overlapping copy + // Extend destination slice and copy one byte at the time. + src := out[start : start+seq.ml] + dst := out[t:] + dst = dst[:len(src)] + t += len(src) + // Destination is the space we just added. + for i := range src { + dst[i] = src[i] + } + } + } + } + + // Add final literals + copy(out[t:], s.literals) + if debugDecoder { + t += len(s.literals) + if t != len(out) { + panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize)) + } + } + s.out = out + + return nil +} + // decode sequences from the stream with the provided history. -func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { +func (s *sequenceDecs) decodeSync(hist []byte) error { + supported, err := s.decodeSyncSimple(hist) + if supported { + return err + } + + br := s.br + seqs := s.nSeqs startSize := len(s.out) // Grab full sizes tables, to avoid bounds checks. llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize] llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state + out := s.out + maxBlockSize := maxCompressedBlockSize + if s.windowSize < maxBlockSize { + maxBlockSize = s.windowSize + } + if debugDecoder { + println("decodeSync: decoding", seqs, "sequences", br.remain(), "bits remain on stream") + } for i := seqs - 1; i >= 0; i-- { if br.overread() { - printf("reading sequence %d, exceeded available data\n", seqs-i) + printf("reading sequence %d, exceeded available data. Overread by %d\n", seqs-i, -br.remain()) return io.ErrUnexpectedEOF } var ll, mo, ml int @@ -151,7 +290,7 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { if temp == 0 { // 0 is not valid; input is corrupted; force offset to 1 - println("temp was 0") + println("WARNING: temp was 0") temp = 1 } @@ -176,51 +315,49 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { if ll > len(s.literals) { return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals)) } - size := ll + ml + len(s.out) + size := ll + ml + len(out) if size-startSize > maxBlockSize { - return fmt.Errorf("output (%d) bigger than max block size", size) + return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) } - if size > cap(s.out) { + if size > cap(out) { // Not enough size, which can happen under high volume block streaming conditions // but could be if destination slice is too small for sync operations. // over-allocating here can create a large amount of GC pressure so we try to keep // it as contained as possible - used := len(s.out) - startSize + used := len(out) - startSize addBytes := 256 + ll + ml + used>>2 // Clamp to max block size. if used+addBytes > maxBlockSize { addBytes = maxBlockSize - used } - s.out = append(s.out, make([]byte, addBytes)...) - s.out = s.out[:len(s.out)-addBytes] + out = append(out, make([]byte, addBytes)...) + out = out[:len(out)-addBytes] } if ml > maxMatchLen { return fmt.Errorf("match len (%d) bigger than max allowed length", ml) } // Add literals - s.out = append(s.out, s.literals[:ll]...) + out = append(out, s.literals[:ll]...) s.literals = s.literals[ll:] - out := s.out if mo == 0 && ml > 0 { return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) } - if mo > len(s.out)+len(hist) || mo > s.windowSize { + if mo > len(out)+len(hist) || mo > s.windowSize { if len(s.dict) == 0 { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist)) + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize) } // we may be in dictionary. - dictO := len(s.dict) - (mo - (len(s.out) + len(hist))) + dictO := len(s.dict) - (mo - (len(out) + len(hist))) if dictO < 0 || dictO >= len(s.dict) { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist)) + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize) } end := dictO + ml if end > len(s.dict) { out = append(out, s.dict[dictO:]...) - mo -= len(s.dict) - dictO ml -= len(s.dict) - dictO } else { out = append(out, s.dict[dictO:end]...) @@ -231,26 +368,25 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { // Copy from history. // TODO: Blocks without history could be made to ignore this completely. - if v := mo - len(s.out); v > 0 { + if v := mo - len(out); v > 0 { // v is the start position in history from end. - start := len(s.hist) - v + start := len(hist) - v if ml > v { // Some goes into current block. // Copy remainder of history - out = append(out, s.hist[start:]...) - mo -= v + out = append(out, hist[start:]...) ml -= v } else { - out = append(out, s.hist[start:start+ml]...) + out = append(out, hist[start:start+ml]...) ml = 0 } } // We must be in current buffer now if ml > 0 { - start := len(s.out) - mo - if ml <= len(s.out)-start { + start := len(out) - mo + if ml <= len(out)-start { // No overlap - out = append(out, s.out[start:start+ml]...) + out = append(out, out[start:start+ml]...) } else { // Overlapping copy // Extend destination slice and copy one byte at the time. @@ -264,7 +400,6 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { } } } - s.out = out if i == 0 { // This is the last sequence, so we shouldn't update state. break @@ -278,7 +413,8 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { mlState = mlTable[mlState.newState()&maxTableMask] ofState = ofTable[ofState.newState()&maxTableMask] } else { - bits := br.getBitsFast(nBits) + bits := br.get32BitsFast(nBits) + lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31)) llState = llTable[(llState.newState()+lowBits)&maxTableMask] @@ -291,19 +427,13 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { } } - // Add final literals - s.out = append(s.out, s.literals...) - return nil -} + if size := len(s.literals) + len(out) - startSize; size > maxBlockSize { + return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) + } -// update states, at least 27 bits must be available. -func (s *sequenceDecs) update(br *bitReader) { - // Max 8 bits - s.litLengths.state.next(br) - // Max 9 bits - s.matchLengths.state.next(br) - // Max 8 bits - s.offsets.state.next(br) + // Add final literals + s.out = append(out, s.literals...) + return br.close() } var bitMask [16]uint16 @@ -314,87 +444,6 @@ func init() { } } -// update states, at least 27 bits must be available. -func (s *sequenceDecs) updateAlt(br *bitReader) { - // Update all 3 states at once. Approx 20% faster. - a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state - - nBits := a.nbBits() + b.nbBits() + c.nbBits() - if nBits == 0 { - s.litLengths.state.state = s.litLengths.state.dt[a.newState()] - s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()] - s.offsets.state.state = s.offsets.state.dt[c.newState()] - return - } - bits := br.getBitsFast(nBits) - lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31)) - s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits] - - lowBits = uint16(bits >> (c.nbBits() & 31)) - lowBits &= bitMask[b.nbBits()&15] - s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()+lowBits] - - lowBits = uint16(bits) & bitMask[c.nbBits()&15] - s.offsets.state.state = s.offsets.state.dt[c.newState()+lowBits] -} - -// nextFast will return new states when there are at least 4 unused bytes left on the stream when done. -func (s *sequenceDecs) nextFast(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) { - // Final will not read from stream. - ll, llB := llState.final() - ml, mlB := mlState.final() - mo, moB := ofState.final() - - // extra bits are stored in reverse order. - br.fillFast() - mo += br.getBits(moB) - if s.maxBits > 32 { - br.fillFast() - } - ml += br.getBits(mlB) - ll += br.getBits(llB) - - if moB > 1 { - s.prevOffset[2] = s.prevOffset[1] - s.prevOffset[1] = s.prevOffset[0] - s.prevOffset[0] = mo - return - } - // mo = s.adjustOffset(mo, ll, moB) - // Inlined for rather big speedup - if ll == 0 { - // There is an exception though, when current sequence's literals_length = 0. - // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, - // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte. - mo++ - } - - if mo == 0 { - mo = s.prevOffset[0] - return - } - var temp int - if mo == 3 { - temp = s.prevOffset[0] - 1 - } else { - temp = s.prevOffset[mo] - } - - if temp == 0 { - // 0 is not valid; input is corrupted; force offset to 1 - println("temp was 0") - temp = 1 - } - - if mo != 1 { - s.prevOffset[2] = s.prevOffset[1] - } - s.prevOffset[1] = s.prevOffset[0] - s.prevOffset[0] = temp - mo = temp - return -} - func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) { // Final will not read from stream. ll, llB := llState.final() @@ -457,36 +506,3 @@ func (s *sequenceDecs) adjustOffset(offset, litLen int, offsetB uint8) int { s.prevOffset[0] = temp return temp } - -// mergeHistory will merge history. -func (s *sequenceDecs) mergeHistory(hist *sequenceDecs) (*sequenceDecs, error) { - for i := uint(0); i < 3; i++ { - var sNew, sHist *sequenceDec - switch i { - default: - // same as "case 0": - sNew = &s.litLengths - sHist = &hist.litLengths - case 1: - sNew = &s.offsets - sHist = &hist.offsets - case 2: - sNew = &s.matchLengths - sHist = &hist.matchLengths - } - if sNew.repeat { - if sHist.fse == nil { - return nil, fmt.Errorf("sequence stream %d, repeat requested, but no history", i) - } - continue - } - if sNew.fse == nil { - return nil, fmt.Errorf("sequence stream %d, no fse found", i) - } - if sHist.fse != nil && !sHist.fse.preDefined { - fseDecoderPool.Put(sHist.fse) - } - sHist.fse = sNew.fse - } - return hist, nil -} diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go new file mode 100644 index 00000000..8adabd82 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go @@ -0,0 +1,394 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +package zstd + +import ( + "fmt" + "io" + + "github.com/klauspost/compress/internal/cpuinfo" +) + +type decodeSyncAsmContext struct { + llTable []decSymbol + mlTable []decSymbol + ofTable []decSymbol + llState uint64 + mlState uint64 + ofState uint64 + iteration int + litRemain int + out []byte + outPosition int + literals []byte + litPosition int + history []byte + windowSize int + ll int // set on error (not for all errors, please refer to _generate/gen.go) + ml int // set on error (not for all errors, please refer to _generate/gen.go) + mo int // set on error (not for all errors, please refer to _generate/gen.go) +} + +// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm. +// +// Please refer to seqdec_generic.go for the reference implementation. +// +//go:noescape +func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions. +// +//go:noescape +func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer. +// +//go:noescape +func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer. +// +//go:noescape +func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// decode sequences from the stream with the provided history but without a dictionary. +func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { + if len(s.dict) > 0 { + return false, nil + } + if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize { + return false, nil + } + + // FIXME: Using unsafe memory copies leads to rare, random crashes + // with fuzz testing. It is therefore disabled for now. + const useSafe = true + /* + useSafe := false + if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc { + useSafe = true + } + if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) { + useSafe = true + } + if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc { + useSafe = true + } + */ + + br := s.br + + maxBlockSize := maxCompressedBlockSize + if s.windowSize < maxBlockSize { + maxBlockSize = s.windowSize + } + + ctx := decodeSyncAsmContext{ + llTable: s.litLengths.fse.dt[:maxTablesize], + mlTable: s.matchLengths.fse.dt[:maxTablesize], + ofTable: s.offsets.fse.dt[:maxTablesize], + llState: uint64(s.litLengths.state.state), + mlState: uint64(s.matchLengths.state.state), + ofState: uint64(s.offsets.state.state), + iteration: s.nSeqs - 1, + litRemain: len(s.literals), + out: s.out, + outPosition: len(s.out), + literals: s.literals, + windowSize: s.windowSize, + history: hist, + } + + s.seqSize = 0 + startSize := len(s.out) + + var errCode int + if cpuinfo.HasBMI2() { + if useSafe { + errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx) + } else { + errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx) + } + } else { + if useSafe { + errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx) + } else { + errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx) + } + } + switch errCode { + case noError: + break + + case errorMatchLenOfsMismatch: + return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml) + + case errorMatchLenTooBig: + return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml) + + case errorMatchOffTooBig: + return true, fmt.Errorf("match offset (%d) bigger than current history (%d)", + ctx.mo, ctx.outPosition+len(hist)-startSize) + + case errorNotEnoughLiterals: + return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", + ctx.ll, ctx.litRemain+ctx.ll) + + case errorOverread: + return true, io.ErrUnexpectedEOF + + case errorNotEnoughSpace: + size := ctx.outPosition + ctx.ll + ctx.ml + if debugDecoder { + println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize) + } + return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) + + default: + return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode) + } + + s.seqSize += ctx.litRemain + if s.seqSize > maxBlockSize { + return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) + } + err := br.close() + if err != nil { + printf("Closing sequences: %v, %+v\n", err, *br) + return true, err + } + + s.literals = s.literals[ctx.litPosition:] + t := ctx.outPosition + s.out = s.out[:t] + + // Add final literals + s.out = append(s.out, s.literals...) + if debugDecoder { + t += len(s.literals) + if t != len(s.out) { + panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t)) + } + } + + return true, nil +} + +// -------------------------------------------------------------------------------- + +type decodeAsmContext struct { + llTable []decSymbol + mlTable []decSymbol + ofTable []decSymbol + llState uint64 + mlState uint64 + ofState uint64 + iteration int + seqs []seqVals + litRemain int +} + +const noError = 0 + +// error reported when mo == 0 && ml > 0 +const errorMatchLenOfsMismatch = 1 + +// error reported when ml > maxMatchLen +const errorMatchLenTooBig = 2 + +// error reported when mo > available history or mo > s.windowSize +const errorMatchOffTooBig = 3 + +// error reported when the sum of literal lengths exeeceds the literal buffer size +const errorNotEnoughLiterals = 4 + +// error reported when capacity of `out` is too small +const errorNotEnoughSpace = 5 + +// error reported when bits are overread. +const errorOverread = 6 + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm. +// +// Please refer to seqdec_generic.go for the reference implementation. +// +//go:noescape +func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm. +// +// Please refer to seqdec_generic.go for the reference implementation. +// +//go:noescape +func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions. +// +//go:noescape +func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions. +// +//go:noescape +func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// decode sequences from the stream without the provided history. +func (s *sequenceDecs) decode(seqs []seqVals) error { + br := s.br + + maxBlockSize := maxCompressedBlockSize + if s.windowSize < maxBlockSize { + maxBlockSize = s.windowSize + } + + ctx := decodeAsmContext{ + llTable: s.litLengths.fse.dt[:maxTablesize], + mlTable: s.matchLengths.fse.dt[:maxTablesize], + ofTable: s.offsets.fse.dt[:maxTablesize], + llState: uint64(s.litLengths.state.state), + mlState: uint64(s.matchLengths.state.state), + ofState: uint64(s.offsets.state.state), + seqs: seqs, + iteration: len(seqs) - 1, + litRemain: len(s.literals), + } + + if debugDecoder { + println("decode: decoding", len(seqs), "sequences", br.remain(), "bits remain on stream") + } + + s.seqSize = 0 + lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56 + var errCode int + if cpuinfo.HasBMI2() { + if lte56bits { + errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx) + } else { + errCode = sequenceDecs_decode_bmi2(s, br, &ctx) + } + } else { + if lte56bits { + errCode = sequenceDecs_decode_56_amd64(s, br, &ctx) + } else { + errCode = sequenceDecs_decode_amd64(s, br, &ctx) + } + } + if errCode != 0 { + i := len(seqs) - ctx.iteration - 1 + switch errCode { + case errorMatchLenOfsMismatch: + ml := ctx.seqs[i].ml + return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) + + case errorMatchLenTooBig: + ml := ctx.seqs[i].ml + return fmt.Errorf("match len (%d) bigger than max allowed length", ml) + + case errorNotEnoughLiterals: + ll := ctx.seqs[i].ll + return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll) + case errorOverread: + return io.ErrUnexpectedEOF + } + + return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode) + } + + if ctx.litRemain < 0 { + return fmt.Errorf("literal count is too big: total available %d, total requested %d", + len(s.literals), len(s.literals)-ctx.litRemain) + } + + s.seqSize += ctx.litRemain + if s.seqSize > maxBlockSize { + return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) + } + if debugDecoder { + println("decode: ", br.remain(), "bits remain on stream. code:", errCode) + } + err := br.close() + if err != nil { + printf("Closing sequences: %v, %+v\n", err, *br) + } + return err +} + +// -------------------------------------------------------------------------------- + +type executeAsmContext struct { + seqs []seqVals + seqIndex int + out []byte + history []byte + literals []byte + outPosition int + litPosition int + windowSize int +} + +// sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm. +// +// Returns false if a match offset is too big. +// +// Please refer to seqdec_generic.go for the reference implementation. +// +//go:noescape +func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool + +// Same as above, but with safe memcopies +// +//go:noescape +func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool + +// executeSimple handles cases when dictionary is not used. +func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error { + // Ensure we have enough output size... + if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) { + addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc + s.out = append(s.out, make([]byte, addBytes)...) + s.out = s.out[:len(s.out)-addBytes] + } + + if debugDecoder { + printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize) + } + + var t = len(s.out) + out := s.out[:t+s.seqSize] + + ctx := executeAsmContext{ + seqs: seqs, + seqIndex: 0, + out: out, + history: hist, + outPosition: t, + litPosition: 0, + literals: s.literals, + windowSize: s.windowSize, + } + var ok bool + if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc { + ok = sequenceDecs_executeSimple_safe_amd64(&ctx) + } else { + ok = sequenceDecs_executeSimple_amd64(&ctx) + } + if !ok { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", + seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist)) + } + s.literals = s.literals[ctx.litPosition:] + t = ctx.outPosition + + // Add final literals + copy(out[t:], s.literals) + if debugDecoder { + t += len(s.literals) + if t != len(out) { + panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize)) + } + } + s.out = out + + return nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s new file mode 100644 index 00000000..b6f4ba6f --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s @@ -0,0 +1,4175 @@ +// Code generated by command: go run gen.go -out ../seqdec_amd64.s -pkg=zstd. DO NOT EDIT. + +//go:build !appengine && !noasm && gc && !noasm + +// func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: CMOV +TEXT ·sequenceDecs_decode_amd64(SB), $8-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + MOVQ 104(AX), R10 + MOVQ s+0(FP), AX + MOVQ 144(AX), R11 + MOVQ 152(AX), R12 + MOVQ 160(AX), R13 + +sequenceDecs_decode_amd64_main_loop: + MOVQ (SP), R14 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decode_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R14 + MOVQ (R14), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decode_amd64_fill_end + +sequenceDecs_decode_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decode_amd64_fill_check_overread + CMPQ BX, $0x07 + JLE sequenceDecs_decode_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R14 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R14), AX + ORQ AX, DX + JMP sequenceDecs_decode_amd64_fill_byte_by_byte + +sequenceDecs_decode_amd64_fill_check_overread: + CMPQ BX, $0x40 + JA error_overread + +sequenceDecs_decode_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_amd64_of_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_amd64_of_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_amd64_of_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_amd64_of_update_zero: + MOVQ AX, 16(R10) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_amd64_ml_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_amd64_ml_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_amd64_ml_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_amd64_ml_update_zero: + MOVQ AX, 8(R10) + + // Fill bitreader to have enough for the remaining + CMPQ SI, $0x08 + JL sequenceDecs_decode_amd64_fill_2_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R14 + MOVQ (R14), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decode_amd64_fill_2_end + +sequenceDecs_decode_amd64_fill_2_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decode_amd64_fill_2_check_overread + CMPQ BX, $0x07 + JLE sequenceDecs_decode_amd64_fill_2_end + SHLQ $0x08, DX + SUBQ $0x01, R14 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R14), AX + ORQ AX, DX + JMP sequenceDecs_decode_amd64_fill_2_byte_by_byte + +sequenceDecs_decode_amd64_fill_2_check_overread: + CMPQ BX, $0x40 + JA error_overread + +sequenceDecs_decode_amd64_fill_2_end: + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_amd64_ll_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_amd64_ll_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_amd64_ll_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_amd64_ll_update_zero: + MOVQ AX, (R10) + + // Fill bitreader for state updates + MOVQ R14, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R14 + SHRQ $0x10, DI + MOVWQZX DI, DI + LEAQ (BX)(R14*1), CX + MOVQ DX, R15 + MOVQ CX, BX + ROLQ CL, R15 + MOVL $0x00000001, BP + MOVB R14, CL + SHLL CL, BP + DECL BP + ANDQ BP, R15 + ADDQ R15, DI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R14 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + LEAQ (BX)(R14*1), CX + MOVQ DX, R15 + MOVQ CX, BX + ROLQ CL, R15 + MOVL $0x00000001, BP + MOVB R14, CL + SHLL CL, BP + DECL BP + ANDQ BP, R15 + ADDQ R15, R8 + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R14 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + LEAQ (BX)(R14*1), CX + MOVQ DX, R15 + MOVQ CX, BX + ROLQ CL, R15 + MOVL $0x00000001, BP + MOVB R14, CL + SHLL CL, BP + DECL BP + ANDQ BP, R15 + ADDQ R15, R9 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decode_amd64_skip_update: + // Adjust offset + MOVQ 16(R10), CX + CMPQ AX, $0x01 + JBE sequenceDecs_decode_amd64_adjust_offsetB_1_or_0 + MOVQ R12, R13 + MOVQ R11, R12 + MOVQ CX, R11 + JMP sequenceDecs_decode_amd64_after_adjust + +sequenceDecs_decode_amd64_adjust_offsetB_1_or_0: + CMPQ (R10), $0x00000000 + JNE sequenceDecs_decode_amd64_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_amd64_adjust_offset_nonzero + +sequenceDecs_decode_amd64_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_amd64_adjust_offset_nonzero + MOVQ R11, CX + JMP sequenceDecs_decode_amd64_after_adjust + +sequenceDecs_decode_amd64_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_amd64_adjust_zero + JEQ sequenceDecs_decode_amd64_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_amd64_adjust_three + JMP sequenceDecs_decode_amd64_adjust_two + +sequenceDecs_decode_amd64_adjust_zero: + MOVQ R11, AX + JMP sequenceDecs_decode_amd64_adjust_test_temp_valid + +sequenceDecs_decode_amd64_adjust_one: + MOVQ R12, AX + JMP sequenceDecs_decode_amd64_adjust_test_temp_valid + +sequenceDecs_decode_amd64_adjust_two: + MOVQ R13, AX + JMP sequenceDecs_decode_amd64_adjust_test_temp_valid + +sequenceDecs_decode_amd64_adjust_three: + LEAQ -1(R11), AX + +sequenceDecs_decode_amd64_adjust_test_temp_valid: + TESTQ AX, AX + JNZ sequenceDecs_decode_amd64_adjust_temp_valid + MOVQ $0x00000001, AX + +sequenceDecs_decode_amd64_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R12, R13 + MOVQ R11, R12 + MOVQ AX, R11 + MOVQ AX, CX + +sequenceDecs_decode_amd64_after_adjust: + MOVQ CX, 16(R10) + + // Check values + MOVQ 8(R10), AX + MOVQ (R10), R14 + LEAQ (AX)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decode_amd64_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decode_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decode_amd64_match_len_ofs_ok: + ADDQ $0x18, R10 + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decode_amd64_main_loop + MOVQ s+0(FP), AX + MOVQ R11, 144(AX) + MOVQ R12, 152(AX) + MOVQ R13, 160(AX) + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_amd64_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_amd64_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with overread error +error_overread: + MOVQ $0x00000006, ret+24(FP) + RET + +// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: CMOV +TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + MOVQ 104(AX), R10 + MOVQ s+0(FP), AX + MOVQ 144(AX), R11 + MOVQ 152(AX), R12 + MOVQ 160(AX), R13 + +sequenceDecs_decode_56_amd64_main_loop: + MOVQ (SP), R14 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decode_56_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R14 + MOVQ (R14), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decode_56_amd64_fill_end + +sequenceDecs_decode_56_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decode_56_amd64_fill_check_overread + CMPQ BX, $0x07 + JLE sequenceDecs_decode_56_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R14 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R14), AX + ORQ AX, DX + JMP sequenceDecs_decode_56_amd64_fill_byte_by_byte + +sequenceDecs_decode_56_amd64_fill_check_overread: + CMPQ BX, $0x40 + JA error_overread + +sequenceDecs_decode_56_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_56_amd64_of_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_56_amd64_of_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_56_amd64_of_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_56_amd64_of_update_zero: + MOVQ AX, 16(R10) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_56_amd64_ml_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_56_amd64_ml_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_56_amd64_ml_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_56_amd64_ml_update_zero: + MOVQ AX, 8(R10) + + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decode_56_amd64_ll_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decode_56_amd64_ll_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decode_56_amd64_ll_update_zero + NEGQ CX + SHRQ CL, R15 + ADDQ R15, AX + +sequenceDecs_decode_56_amd64_ll_update_zero: + MOVQ AX, (R10) + + // Fill bitreader for state updates + MOVQ R14, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_56_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R14 + SHRQ $0x10, DI + MOVWQZX DI, DI + LEAQ (BX)(R14*1), CX + MOVQ DX, R15 + MOVQ CX, BX + ROLQ CL, R15 + MOVL $0x00000001, BP + MOVB R14, CL + SHLL CL, BP + DECL BP + ANDQ BP, R15 + ADDQ R15, DI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R14 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + LEAQ (BX)(R14*1), CX + MOVQ DX, R15 + MOVQ CX, BX + ROLQ CL, R15 + MOVL $0x00000001, BP + MOVB R14, CL + SHLL CL, BP + DECL BP + ANDQ BP, R15 + ADDQ R15, R8 + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R14 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + LEAQ (BX)(R14*1), CX + MOVQ DX, R15 + MOVQ CX, BX + ROLQ CL, R15 + MOVL $0x00000001, BP + MOVB R14, CL + SHLL CL, BP + DECL BP + ANDQ BP, R15 + ADDQ R15, R9 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decode_56_amd64_skip_update: + // Adjust offset + MOVQ 16(R10), CX + CMPQ AX, $0x01 + JBE sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0 + MOVQ R12, R13 + MOVQ R11, R12 + MOVQ CX, R11 + JMP sequenceDecs_decode_56_amd64_after_adjust + +sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0: + CMPQ (R10), $0x00000000 + JNE sequenceDecs_decode_56_amd64_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_56_amd64_adjust_offset_nonzero + +sequenceDecs_decode_56_amd64_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_56_amd64_adjust_offset_nonzero + MOVQ R11, CX + JMP sequenceDecs_decode_56_amd64_after_adjust + +sequenceDecs_decode_56_amd64_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_56_amd64_adjust_zero + JEQ sequenceDecs_decode_56_amd64_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_56_amd64_adjust_three + JMP sequenceDecs_decode_56_amd64_adjust_two + +sequenceDecs_decode_56_amd64_adjust_zero: + MOVQ R11, AX + JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid + +sequenceDecs_decode_56_amd64_adjust_one: + MOVQ R12, AX + JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid + +sequenceDecs_decode_56_amd64_adjust_two: + MOVQ R13, AX + JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid + +sequenceDecs_decode_56_amd64_adjust_three: + LEAQ -1(R11), AX + +sequenceDecs_decode_56_amd64_adjust_test_temp_valid: + TESTQ AX, AX + JNZ sequenceDecs_decode_56_amd64_adjust_temp_valid + MOVQ $0x00000001, AX + +sequenceDecs_decode_56_amd64_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R12, R13 + MOVQ R11, R12 + MOVQ AX, R11 + MOVQ AX, CX + +sequenceDecs_decode_56_amd64_after_adjust: + MOVQ CX, 16(R10) + + // Check values + MOVQ 8(R10), AX + MOVQ (R10), R14 + LEAQ (AX)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decode_56_amd64_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_56_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decode_56_amd64_match_len_ofs_ok: + ADDQ $0x18, R10 + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decode_56_amd64_main_loop + MOVQ s+0(FP), AX + MOVQ R11, 144(AX) + MOVQ R12, 152(AX) + MOVQ R13, 160(AX) + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_56_amd64_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with overread error +error_overread: + MOVQ $0x00000006, ret+24(FP) + RET + +// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: BMI, BMI2, CMOV +TEXT ·sequenceDecs_decode_bmi2(SB), $8-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + MOVQ 104(CX), R9 + MOVQ s+0(FP), CX + MOVQ 144(CX), R10 + MOVQ 152(CX), R11 + MOVQ 160(CX), R12 + +sequenceDecs_decode_bmi2_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decode_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R13 + MOVQ (R13), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decode_bmi2_fill_end + +sequenceDecs_decode_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decode_bmi2_fill_check_overread + CMPQ DX, $0x07 + JLE sequenceDecs_decode_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R13 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R13), CX + ORQ CX, AX + JMP sequenceDecs_decode_bmi2_fill_byte_by_byte + +sequenceDecs_decode_bmi2_fill_check_overread: + CMPQ DX, $0x40 + JA error_overread + +sequenceDecs_decode_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 16(R9) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 8(R9) + + // Fill bitreader to have enough for the remaining + CMPQ BX, $0x08 + JL sequenceDecs_decode_bmi2_fill_2_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R13 + MOVQ (R13), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decode_bmi2_fill_2_end + +sequenceDecs_decode_bmi2_fill_2_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decode_bmi2_fill_2_check_overread + CMPQ DX, $0x07 + JLE sequenceDecs_decode_bmi2_fill_2_end + SHLQ $0x08, AX + SUBQ $0x01, R13 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R13), CX + ORQ CX, AX + JMP sequenceDecs_decode_bmi2_fill_2_byte_by_byte + +sequenceDecs_decode_bmi2_fill_2_check_overread: + CMPQ DX, $0x40 + JA error_overread + +sequenceDecs_decode_bmi2_fill_2_end: + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, (R9) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_bmi2_skip_update + LEAQ (SI)(DI*1), R14 + ADDQ R8, R14 + MOVBQZX R14, R14 + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + + // Update Offset State + BZHIQ R8, R15, CX + SHRXQ R8, R15, R15 + MOVQ $0x00001010, R14 + BEXTRQ R14, R8, R8 + ADDQ CX, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Match Length State + BZHIQ DI, R15, CX + SHRXQ DI, R15, R15 + MOVQ $0x00001010, R14 + BEXTRQ R14, DI, DI + ADDQ CX, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Literal Length State + BZHIQ SI, R15, CX + MOVQ $0x00001010, R14 + BEXTRQ R14, SI, SI + ADDQ CX, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + +sequenceDecs_decode_bmi2_skip_update: + // Adjust offset + MOVQ 16(R9), CX + CMPQ R13, $0x01 + JBE sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0 + MOVQ R11, R12 + MOVQ R10, R11 + MOVQ CX, R10 + JMP sequenceDecs_decode_bmi2_after_adjust + +sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0: + CMPQ (R9), $0x00000000 + JNE sequenceDecs_decode_bmi2_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_bmi2_adjust_offset_nonzero + +sequenceDecs_decode_bmi2_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_bmi2_adjust_offset_nonzero + MOVQ R10, CX + JMP sequenceDecs_decode_bmi2_after_adjust + +sequenceDecs_decode_bmi2_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_bmi2_adjust_zero + JEQ sequenceDecs_decode_bmi2_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_bmi2_adjust_three + JMP sequenceDecs_decode_bmi2_adjust_two + +sequenceDecs_decode_bmi2_adjust_zero: + MOVQ R10, R13 + JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_bmi2_adjust_one: + MOVQ R11, R13 + JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_bmi2_adjust_two: + MOVQ R12, R13 + JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_bmi2_adjust_three: + LEAQ -1(R10), R13 + +sequenceDecs_decode_bmi2_adjust_test_temp_valid: + TESTQ R13, R13 + JNZ sequenceDecs_decode_bmi2_adjust_temp_valid + MOVQ $0x00000001, R13 + +sequenceDecs_decode_bmi2_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R11, R12 + MOVQ R10, R11 + MOVQ R13, R10 + MOVQ R13, CX + +sequenceDecs_decode_bmi2_after_adjust: + MOVQ CX, 16(R9) + + // Check values + MOVQ 8(R9), R13 + MOVQ (R9), R14 + LEAQ (R13)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ R13, $0x00020002 + JA sequenceDecs_decode_bmi2_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_bmi2_match_len_ofs_ok + TESTQ R13, R13 + JNZ sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decode_bmi2_match_len_ofs_ok: + ADDQ $0x18, R9 + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decode_bmi2_main_loop + MOVQ s+0(FP), CX + MOVQ R10, 144(CX) + MOVQ R11, 152(CX) + MOVQ R12, 160(CX) + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_bmi2_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with overread error +error_overread: + MOVQ $0x00000006, ret+24(FP) + RET + +// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: BMI, BMI2, CMOV +TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + MOVQ 104(CX), R9 + MOVQ s+0(FP), CX + MOVQ 144(CX), R10 + MOVQ 152(CX), R11 + MOVQ 160(CX), R12 + +sequenceDecs_decode_56_bmi2_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decode_56_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R13 + MOVQ (R13), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decode_56_bmi2_fill_end + +sequenceDecs_decode_56_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decode_56_bmi2_fill_check_overread + CMPQ DX, $0x07 + JLE sequenceDecs_decode_56_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R13 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R13), CX + ORQ CX, AX + JMP sequenceDecs_decode_56_bmi2_fill_byte_by_byte + +sequenceDecs_decode_56_bmi2_fill_check_overread: + CMPQ DX, $0x40 + JA error_overread + +sequenceDecs_decode_56_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 16(R9) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 8(R9) + + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, (R9) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_56_bmi2_skip_update + LEAQ (SI)(DI*1), R14 + ADDQ R8, R14 + MOVBQZX R14, R14 + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + + // Update Offset State + BZHIQ R8, R15, CX + SHRXQ R8, R15, R15 + MOVQ $0x00001010, R14 + BEXTRQ R14, R8, R8 + ADDQ CX, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Match Length State + BZHIQ DI, R15, CX + SHRXQ DI, R15, R15 + MOVQ $0x00001010, R14 + BEXTRQ R14, DI, DI + ADDQ CX, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Literal Length State + BZHIQ SI, R15, CX + MOVQ $0x00001010, R14 + BEXTRQ R14, SI, SI + ADDQ CX, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + +sequenceDecs_decode_56_bmi2_skip_update: + // Adjust offset + MOVQ 16(R9), CX + CMPQ R13, $0x01 + JBE sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0 + MOVQ R11, R12 + MOVQ R10, R11 + MOVQ CX, R10 + JMP sequenceDecs_decode_56_bmi2_after_adjust + +sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0: + CMPQ (R9), $0x00000000 + JNE sequenceDecs_decode_56_bmi2_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_56_bmi2_adjust_offset_nonzero + +sequenceDecs_decode_56_bmi2_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_56_bmi2_adjust_offset_nonzero + MOVQ R10, CX + JMP sequenceDecs_decode_56_bmi2_after_adjust + +sequenceDecs_decode_56_bmi2_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_56_bmi2_adjust_zero + JEQ sequenceDecs_decode_56_bmi2_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_56_bmi2_adjust_three + JMP sequenceDecs_decode_56_bmi2_adjust_two + +sequenceDecs_decode_56_bmi2_adjust_zero: + MOVQ R10, R13 + JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_56_bmi2_adjust_one: + MOVQ R11, R13 + JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_56_bmi2_adjust_two: + MOVQ R12, R13 + JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_56_bmi2_adjust_three: + LEAQ -1(R10), R13 + +sequenceDecs_decode_56_bmi2_adjust_test_temp_valid: + TESTQ R13, R13 + JNZ sequenceDecs_decode_56_bmi2_adjust_temp_valid + MOVQ $0x00000001, R13 + +sequenceDecs_decode_56_bmi2_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R11, R12 + MOVQ R10, R11 + MOVQ R13, R10 + MOVQ R13, CX + +sequenceDecs_decode_56_bmi2_after_adjust: + MOVQ CX, 16(R9) + + // Check values + MOVQ 8(R9), R13 + MOVQ (R9), R14 + LEAQ (R13)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ R13, $0x00020002 + JA sequenceDecs_decode_56_bmi2_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_56_bmi2_match_len_ofs_ok + TESTQ R13, R13 + JNZ sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decode_56_bmi2_match_len_ofs_ok: + ADDQ $0x18, R9 + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decode_56_bmi2_main_loop + MOVQ s+0(FP), CX + MOVQ R10, 144(CX) + MOVQ R11, 152(CX) + MOVQ R12, 160(CX) + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_56_bmi2_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with overread error +error_overread: + MOVQ $0x00000006, ret+24(FP) + RET + +// func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool +// Requires: SSE +TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9 + MOVQ ctx+0(FP), R10 + MOVQ 8(R10), CX + TESTQ CX, CX + JZ empty_seqs + MOVQ (R10), AX + MOVQ 24(R10), DX + MOVQ 32(R10), BX + MOVQ 80(R10), SI + MOVQ 104(R10), DI + MOVQ 120(R10), R8 + MOVQ 56(R10), R9 + MOVQ 64(R10), R10 + ADDQ R10, R9 + + // seqsBase += 24 * seqIndex + LEAQ (DX)(DX*2), R11 + SHLQ $0x03, R11 + ADDQ R11, AX + + // outBase += outPosition + ADDQ DI, BX + +main_loop: + MOVQ (AX), R11 + MOVQ 16(AX), R12 + MOVQ 8(AX), R13 + + // Copy literals + TESTQ R11, R11 + JZ check_offset + XORQ R14, R14 + +copy_1: + MOVUPS (SI)(R14*1), X0 + MOVUPS X0, (BX)(R14*1) + ADDQ $0x10, R14 + CMPQ R14, R11 + JB copy_1 + ADDQ R11, SI + ADDQ R11, BX + ADDQ R11, DI + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + LEAQ (DI)(R10*1), R11 + CMPQ R12, R11 + JG error_match_off_too_big + CMPQ R12, R8 + JG error_match_off_too_big + + // Copy match from history + MOVQ R12, R11 + SUBQ DI, R11 + JLS copy_match + MOVQ R9, R14 + SUBQ R11, R14 + CMPQ R13, R11 + JG copy_all_from_history + MOVQ R13, R11 + SUBQ $0x10, R11 + JB copy_4_small + +copy_4_loop: + MOVUPS (R14), X0 + MOVUPS X0, (BX) + ADDQ $0x10, R14 + ADDQ $0x10, BX + SUBQ $0x10, R11 + JAE copy_4_loop + LEAQ 16(R14)(R11*1), R14 + LEAQ 16(BX)(R11*1), BX + MOVUPS -16(R14), X0 + MOVUPS X0, -16(BX) + JMP copy_4_end + +copy_4_small: + CMPQ R13, $0x03 + JE copy_4_move_3 + CMPQ R13, $0x08 + JB copy_4_move_4through7 + JMP copy_4_move_8through16 + +copy_4_move_3: + MOVW (R14), R11 + MOVB 2(R14), R12 + MOVW R11, (BX) + MOVB R12, 2(BX) + ADDQ R13, R14 + ADDQ R13, BX + JMP copy_4_end + +copy_4_move_4through7: + MOVL (R14), R11 + MOVL -4(R14)(R13*1), R12 + MOVL R11, (BX) + MOVL R12, -4(BX)(R13*1) + ADDQ R13, R14 + ADDQ R13, BX + JMP copy_4_end + +copy_4_move_8through16: + MOVQ (R14), R11 + MOVQ -8(R14)(R13*1), R12 + MOVQ R11, (BX) + MOVQ R12, -8(BX)(R13*1) + ADDQ R13, R14 + ADDQ R13, BX + +copy_4_end: + ADDQ R13, DI + ADDQ $0x18, AX + INCQ DX + CMPQ DX, CX + JB main_loop + JMP loop_finished + +copy_all_from_history: + MOVQ R11, R15 + SUBQ $0x10, R15 + JB copy_5_small + +copy_5_loop: + MOVUPS (R14), X0 + MOVUPS X0, (BX) + ADDQ $0x10, R14 + ADDQ $0x10, BX + SUBQ $0x10, R15 + JAE copy_5_loop + LEAQ 16(R14)(R15*1), R14 + LEAQ 16(BX)(R15*1), BX + MOVUPS -16(R14), X0 + MOVUPS X0, -16(BX) + JMP copy_5_end + +copy_5_small: + CMPQ R11, $0x03 + JE copy_5_move_3 + JB copy_5_move_1or2 + CMPQ R11, $0x08 + JB copy_5_move_4through7 + JMP copy_5_move_8through16 + +copy_5_move_1or2: + MOVB (R14), R15 + MOVB -1(R14)(R11*1), BP + MOVB R15, (BX) + MOVB BP, -1(BX)(R11*1) + ADDQ R11, R14 + ADDQ R11, BX + JMP copy_5_end + +copy_5_move_3: + MOVW (R14), R15 + MOVB 2(R14), BP + MOVW R15, (BX) + MOVB BP, 2(BX) + ADDQ R11, R14 + ADDQ R11, BX + JMP copy_5_end + +copy_5_move_4through7: + MOVL (R14), R15 + MOVL -4(R14)(R11*1), BP + MOVL R15, (BX) + MOVL BP, -4(BX)(R11*1) + ADDQ R11, R14 + ADDQ R11, BX + JMP copy_5_end + +copy_5_move_8through16: + MOVQ (R14), R15 + MOVQ -8(R14)(R11*1), BP + MOVQ R15, (BX) + MOVQ BP, -8(BX)(R11*1) + ADDQ R11, R14 + ADDQ R11, BX + +copy_5_end: + ADDQ R11, DI + SUBQ R11, R13 + + // Copy match from the current buffer +copy_match: + MOVQ BX, R11 + SUBQ R12, R11 + + // ml <= mo + CMPQ R13, R12 + JA copy_overlapping_match + + // Copy non-overlapping match + ADDQ R13, DI + MOVQ BX, R12 + ADDQ R13, BX + +copy_2: + MOVUPS (R11), X0 + MOVUPS X0, (R12) + ADDQ $0x10, R11 + ADDQ $0x10, R12 + SUBQ $0x10, R13 + JHI copy_2 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + ADDQ R13, DI + +copy_slow_3: + MOVB (R11), R12 + MOVB R12, (BX) + INCQ R11 + INCQ BX + DECQ R13 + JNZ copy_slow_3 + +handle_loop: + ADDQ $0x18, AX + INCQ DX + CMPQ DX, CX + JB main_loop + +loop_finished: + // Return value + MOVB $0x01, ret+8(FP) + + // Update the context + MOVQ ctx+0(FP), AX + MOVQ DX, 24(AX) + MOVQ DI, 104(AX) + SUBQ 80(AX), SI + MOVQ SI, 112(AX) + RET + +error_match_off_too_big: + // Return value + MOVB $0x00, ret+8(FP) + + // Update the context + MOVQ ctx+0(FP), AX + MOVQ DX, 24(AX) + MOVQ DI, 104(AX) + SUBQ 80(AX), SI + MOVQ SI, 112(AX) + RET + +empty_seqs: + // Return value + MOVB $0x01, ret+8(FP) + RET + +// func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool +// Requires: SSE +TEXT ·sequenceDecs_executeSimple_safe_amd64(SB), $8-9 + MOVQ ctx+0(FP), R10 + MOVQ 8(R10), CX + TESTQ CX, CX + JZ empty_seqs + MOVQ (R10), AX + MOVQ 24(R10), DX + MOVQ 32(R10), BX + MOVQ 80(R10), SI + MOVQ 104(R10), DI + MOVQ 120(R10), R8 + MOVQ 56(R10), R9 + MOVQ 64(R10), R10 + ADDQ R10, R9 + + // seqsBase += 24 * seqIndex + LEAQ (DX)(DX*2), R11 + SHLQ $0x03, R11 + ADDQ R11, AX + + // outBase += outPosition + ADDQ DI, BX + +main_loop: + MOVQ (AX), R11 + MOVQ 16(AX), R12 + MOVQ 8(AX), R13 + + // Copy literals + TESTQ R11, R11 + JZ check_offset + MOVQ R11, R14 + SUBQ $0x10, R14 + JB copy_1_small + +copy_1_loop: + MOVUPS (SI), X0 + MOVUPS X0, (BX) + ADDQ $0x10, SI + ADDQ $0x10, BX + SUBQ $0x10, R14 + JAE copy_1_loop + LEAQ 16(SI)(R14*1), SI + LEAQ 16(BX)(R14*1), BX + MOVUPS -16(SI), X0 + MOVUPS X0, -16(BX) + JMP copy_1_end + +copy_1_small: + CMPQ R11, $0x03 + JE copy_1_move_3 + JB copy_1_move_1or2 + CMPQ R11, $0x08 + JB copy_1_move_4through7 + JMP copy_1_move_8through16 + +copy_1_move_1or2: + MOVB (SI), R14 + MOVB -1(SI)(R11*1), R15 + MOVB R14, (BX) + MOVB R15, -1(BX)(R11*1) + ADDQ R11, SI + ADDQ R11, BX + JMP copy_1_end + +copy_1_move_3: + MOVW (SI), R14 + MOVB 2(SI), R15 + MOVW R14, (BX) + MOVB R15, 2(BX) + ADDQ R11, SI + ADDQ R11, BX + JMP copy_1_end + +copy_1_move_4through7: + MOVL (SI), R14 + MOVL -4(SI)(R11*1), R15 + MOVL R14, (BX) + MOVL R15, -4(BX)(R11*1) + ADDQ R11, SI + ADDQ R11, BX + JMP copy_1_end + +copy_1_move_8through16: + MOVQ (SI), R14 + MOVQ -8(SI)(R11*1), R15 + MOVQ R14, (BX) + MOVQ R15, -8(BX)(R11*1) + ADDQ R11, SI + ADDQ R11, BX + +copy_1_end: + ADDQ R11, DI + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + LEAQ (DI)(R10*1), R11 + CMPQ R12, R11 + JG error_match_off_too_big + CMPQ R12, R8 + JG error_match_off_too_big + + // Copy match from history + MOVQ R12, R11 + SUBQ DI, R11 + JLS copy_match + MOVQ R9, R14 + SUBQ R11, R14 + CMPQ R13, R11 + JG copy_all_from_history + MOVQ R13, R11 + SUBQ $0x10, R11 + JB copy_4_small + +copy_4_loop: + MOVUPS (R14), X0 + MOVUPS X0, (BX) + ADDQ $0x10, R14 + ADDQ $0x10, BX + SUBQ $0x10, R11 + JAE copy_4_loop + LEAQ 16(R14)(R11*1), R14 + LEAQ 16(BX)(R11*1), BX + MOVUPS -16(R14), X0 + MOVUPS X0, -16(BX) + JMP copy_4_end + +copy_4_small: + CMPQ R13, $0x03 + JE copy_4_move_3 + CMPQ R13, $0x08 + JB copy_4_move_4through7 + JMP copy_4_move_8through16 + +copy_4_move_3: + MOVW (R14), R11 + MOVB 2(R14), R12 + MOVW R11, (BX) + MOVB R12, 2(BX) + ADDQ R13, R14 + ADDQ R13, BX + JMP copy_4_end + +copy_4_move_4through7: + MOVL (R14), R11 + MOVL -4(R14)(R13*1), R12 + MOVL R11, (BX) + MOVL R12, -4(BX)(R13*1) + ADDQ R13, R14 + ADDQ R13, BX + JMP copy_4_end + +copy_4_move_8through16: + MOVQ (R14), R11 + MOVQ -8(R14)(R13*1), R12 + MOVQ R11, (BX) + MOVQ R12, -8(BX)(R13*1) + ADDQ R13, R14 + ADDQ R13, BX + +copy_4_end: + ADDQ R13, DI + ADDQ $0x18, AX + INCQ DX + CMPQ DX, CX + JB main_loop + JMP loop_finished + +copy_all_from_history: + MOVQ R11, R15 + SUBQ $0x10, R15 + JB copy_5_small + +copy_5_loop: + MOVUPS (R14), X0 + MOVUPS X0, (BX) + ADDQ $0x10, R14 + ADDQ $0x10, BX + SUBQ $0x10, R15 + JAE copy_5_loop + LEAQ 16(R14)(R15*1), R14 + LEAQ 16(BX)(R15*1), BX + MOVUPS -16(R14), X0 + MOVUPS X0, -16(BX) + JMP copy_5_end + +copy_5_small: + CMPQ R11, $0x03 + JE copy_5_move_3 + JB copy_5_move_1or2 + CMPQ R11, $0x08 + JB copy_5_move_4through7 + JMP copy_5_move_8through16 + +copy_5_move_1or2: + MOVB (R14), R15 + MOVB -1(R14)(R11*1), BP + MOVB R15, (BX) + MOVB BP, -1(BX)(R11*1) + ADDQ R11, R14 + ADDQ R11, BX + JMP copy_5_end + +copy_5_move_3: + MOVW (R14), R15 + MOVB 2(R14), BP + MOVW R15, (BX) + MOVB BP, 2(BX) + ADDQ R11, R14 + ADDQ R11, BX + JMP copy_5_end + +copy_5_move_4through7: + MOVL (R14), R15 + MOVL -4(R14)(R11*1), BP + MOVL R15, (BX) + MOVL BP, -4(BX)(R11*1) + ADDQ R11, R14 + ADDQ R11, BX + JMP copy_5_end + +copy_5_move_8through16: + MOVQ (R14), R15 + MOVQ -8(R14)(R11*1), BP + MOVQ R15, (BX) + MOVQ BP, -8(BX)(R11*1) + ADDQ R11, R14 + ADDQ R11, BX + +copy_5_end: + ADDQ R11, DI + SUBQ R11, R13 + + // Copy match from the current buffer +copy_match: + MOVQ BX, R11 + SUBQ R12, R11 + + // ml <= mo + CMPQ R13, R12 + JA copy_overlapping_match + + // Copy non-overlapping match + ADDQ R13, DI + MOVQ R13, R12 + SUBQ $0x10, R12 + JB copy_2_small + +copy_2_loop: + MOVUPS (R11), X0 + MOVUPS X0, (BX) + ADDQ $0x10, R11 + ADDQ $0x10, BX + SUBQ $0x10, R12 + JAE copy_2_loop + LEAQ 16(R11)(R12*1), R11 + LEAQ 16(BX)(R12*1), BX + MOVUPS -16(R11), X0 + MOVUPS X0, -16(BX) + JMP copy_2_end + +copy_2_small: + CMPQ R13, $0x03 + JE copy_2_move_3 + JB copy_2_move_1or2 + CMPQ R13, $0x08 + JB copy_2_move_4through7 + JMP copy_2_move_8through16 + +copy_2_move_1or2: + MOVB (R11), R12 + MOVB -1(R11)(R13*1), R14 + MOVB R12, (BX) + MOVB R14, -1(BX)(R13*1) + ADDQ R13, R11 + ADDQ R13, BX + JMP copy_2_end + +copy_2_move_3: + MOVW (R11), R12 + MOVB 2(R11), R14 + MOVW R12, (BX) + MOVB R14, 2(BX) + ADDQ R13, R11 + ADDQ R13, BX + JMP copy_2_end + +copy_2_move_4through7: + MOVL (R11), R12 + MOVL -4(R11)(R13*1), R14 + MOVL R12, (BX) + MOVL R14, -4(BX)(R13*1) + ADDQ R13, R11 + ADDQ R13, BX + JMP copy_2_end + +copy_2_move_8through16: + MOVQ (R11), R12 + MOVQ -8(R11)(R13*1), R14 + MOVQ R12, (BX) + MOVQ R14, -8(BX)(R13*1) + ADDQ R13, R11 + ADDQ R13, BX + +copy_2_end: + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + ADDQ R13, DI + +copy_slow_3: + MOVB (R11), R12 + MOVB R12, (BX) + INCQ R11 + INCQ BX + DECQ R13 + JNZ copy_slow_3 + +handle_loop: + ADDQ $0x18, AX + INCQ DX + CMPQ DX, CX + JB main_loop + +loop_finished: + // Return value + MOVB $0x01, ret+8(FP) + + // Update the context + MOVQ ctx+0(FP), AX + MOVQ DX, 24(AX) + MOVQ DI, 104(AX) + SUBQ 80(AX), SI + MOVQ SI, 112(AX) + RET + +error_match_off_too_big: + // Return value + MOVB $0x00, ret+8(FP) + + // Update the context + MOVQ ctx+0(FP), AX + MOVQ DX, 24(AX) + MOVQ DI, 104(AX) + SUBQ 80(AX), SI + MOVQ SI, 112(AX) + RET + +empty_seqs: + // Return value + MOVB $0x01, ret+8(FP) + RET + +// func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: CMOV, SSE +TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + XORQ CX, CX + MOVQ CX, 8(SP) + MOVQ CX, 16(SP) + MOVQ CX, 24(SP) + MOVQ 112(AX), R10 + MOVQ 128(AX), CX + MOVQ CX, 32(SP) + MOVQ 144(AX), R11 + MOVQ 136(AX), R12 + MOVQ 200(AX), CX + MOVQ CX, 56(SP) + MOVQ 176(AX), CX + MOVQ CX, 48(SP) + MOVQ 184(AX), AX + MOVQ AX, 40(SP) + MOVQ 40(SP), AX + ADDQ AX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R10, 32(SP) + + // outBase += outPosition + ADDQ R12, R10 + +sequenceDecs_decodeSync_amd64_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_amd64_fill_end + +sequenceDecs_decodeSync_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_amd64_fill_check_overread + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_amd64_fill_byte_by_byte + +sequenceDecs_decodeSync_amd64_fill_check_overread: + CMPQ BX, $0x40 + JA error_overread + +sequenceDecs_decodeSync_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_amd64_of_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_amd64_of_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_amd64_of_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_amd64_of_update_zero: + MOVQ AX, 8(SP) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_amd64_ml_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_amd64_ml_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_amd64_ml_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_amd64_ml_update_zero: + MOVQ AX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_amd64_fill_2_end + +sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_amd64_fill_2_check_overread + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_amd64_fill_2_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte + +sequenceDecs_decodeSync_amd64_fill_2_check_overread: + CMPQ BX, $0x40 + JA error_overread + +sequenceDecs_decodeSync_amd64_fill_2_end: + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_amd64_ll_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_amd64_ll_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_amd64_ll_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_amd64_ll_update_zero: + MOVQ AX, 24(SP) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R13 + SHRQ $0x10, DI + MOVWQZX DI, DI + LEAQ (BX)(R13*1), CX + MOVQ DX, R14 + MOVQ CX, BX + ROLQ CL, R14 + MOVL $0x00000001, R15 + MOVB R13, CL + SHLL CL, R15 + DECL R15 + ANDQ R15, R14 + ADDQ R14, DI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R13 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + LEAQ (BX)(R13*1), CX + MOVQ DX, R14 + MOVQ CX, BX + ROLQ CL, R14 + MOVL $0x00000001, R15 + MOVB R13, CL + SHLL CL, R15 + DECL R15 + ANDQ R15, R14 + ADDQ R14, R8 + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R13 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + LEAQ (BX)(R13*1), CX + MOVQ DX, R14 + MOVQ CX, BX + ROLQ CL, R14 + MOVL $0x00000001, R15 + MOVB R13, CL + SHLL CL, R15 + DECL R15 + ANDQ R15, R14 + ADDQ R14, R9 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decodeSync_amd64_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ AX, $0x01 + JBE sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_amd64_after_adjust + +sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_amd64_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_amd64_adjust_offset_nonzero + +sequenceDecs_decodeSync_amd64_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_amd64_after_adjust + +sequenceDecs_decodeSync_amd64_adjust_offset_nonzero: + MOVQ R13, AX + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, AX + CMOVQEQ R15, R14 + ADDQ 144(CX)(AX*8), R14 + JNZ sequenceDecs_decodeSync_amd64_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_amd64_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_amd64_adjust_skip + MOVQ 152(CX), AX + MOVQ AX, 160(CX) + +sequenceDecs_decodeSync_amd64_adjust_skip: + MOVQ 144(CX), AX + MOVQ AX, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_amd64_after_adjust: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), AX + MOVQ 24(SP), CX + LEAQ (AX)(CX*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ CX, 104(R14) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decodeSync_amd64_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_amd64_match_len_ofs_ok: + MOVQ 24(SP), AX + MOVQ 8(SP), CX + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (AX)(R13*1), R14 + ADDQ R10, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ AX, AX + JZ check_offset + XORQ R14, R14 + +copy_1: + MOVUPS (R11)(R14*1), X0 + MOVUPS X0, (R10)(R14*1) + ADDQ $0x10, R14 + CMPQ R14, AX + JB copy_1 + ADDQ AX, R11 + ADDQ AX, R10 + ADDQ AX, R12 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R12, AX + ADDQ 40(SP), AX + CMPQ CX, AX + JG error_match_off_too_big + CMPQ CX, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ CX, AX + SUBQ R12, AX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ AX, R14 + CMPQ R13, AX + JG copy_all_from_history + MOVQ R13, AX + SUBQ $0x10, AX + JB copy_4_small + +copy_4_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R10) + ADDQ $0x10, R14 + ADDQ $0x10, R10 + SUBQ $0x10, AX + JAE copy_4_loop + LEAQ 16(R14)(AX*1), R14 + LEAQ 16(R10)(AX*1), R10 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R10) + JMP copy_4_end + +copy_4_small: + CMPQ R13, $0x03 + JE copy_4_move_3 + CMPQ R13, $0x08 + JB copy_4_move_4through7 + JMP copy_4_move_8through16 + +copy_4_move_3: + MOVW (R14), AX + MOVB 2(R14), CL + MOVW AX, (R10) + MOVB CL, 2(R10) + ADDQ R13, R14 + ADDQ R13, R10 + JMP copy_4_end + +copy_4_move_4through7: + MOVL (R14), AX + MOVL -4(R14)(R13*1), CX + MOVL AX, (R10) + MOVL CX, -4(R10)(R13*1) + ADDQ R13, R14 + ADDQ R13, R10 + JMP copy_4_end + +copy_4_move_8through16: + MOVQ (R14), AX + MOVQ -8(R14)(R13*1), CX + MOVQ AX, (R10) + MOVQ CX, -8(R10)(R13*1) + ADDQ R13, R14 + ADDQ R13, R10 + +copy_4_end: + ADDQ R13, R12 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + MOVQ AX, R15 + SUBQ $0x10, R15 + JB copy_5_small + +copy_5_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R10) + ADDQ $0x10, R14 + ADDQ $0x10, R10 + SUBQ $0x10, R15 + JAE copy_5_loop + LEAQ 16(R14)(R15*1), R14 + LEAQ 16(R10)(R15*1), R10 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R10) + JMP copy_5_end + +copy_5_small: + CMPQ AX, $0x03 + JE copy_5_move_3 + JB copy_5_move_1or2 + CMPQ AX, $0x08 + JB copy_5_move_4through7 + JMP copy_5_move_8through16 + +copy_5_move_1or2: + MOVB (R14), R15 + MOVB -1(R14)(AX*1), BP + MOVB R15, (R10) + MOVB BP, -1(R10)(AX*1) + ADDQ AX, R14 + ADDQ AX, R10 + JMP copy_5_end + +copy_5_move_3: + MOVW (R14), R15 + MOVB 2(R14), BP + MOVW R15, (R10) + MOVB BP, 2(R10) + ADDQ AX, R14 + ADDQ AX, R10 + JMP copy_5_end + +copy_5_move_4through7: + MOVL (R14), R15 + MOVL -4(R14)(AX*1), BP + MOVL R15, (R10) + MOVL BP, -4(R10)(AX*1) + ADDQ AX, R14 + ADDQ AX, R10 + JMP copy_5_end + +copy_5_move_8through16: + MOVQ (R14), R15 + MOVQ -8(R14)(AX*1), BP + MOVQ R15, (R10) + MOVQ BP, -8(R10)(AX*1) + ADDQ AX, R14 + ADDQ AX, R10 + +copy_5_end: + ADDQ AX, R12 + SUBQ AX, R13 + + // Copy match from the current buffer +copy_match: + MOVQ R10, AX + SUBQ CX, AX + + // ml <= mo + CMPQ R13, CX + JA copy_overlapping_match + + // Copy non-overlapping match + ADDQ R13, R12 + MOVQ R10, CX + ADDQ R13, R10 + +copy_2: + MOVUPS (AX), X0 + MOVUPS X0, (CX) + ADDQ $0x10, AX + ADDQ $0x10, CX + SUBQ $0x10, R13 + JHI copy_2 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + ADDQ R13, R12 + +copy_slow_3: + MOVB (AX), CL + MOVB CL, (R10) + INCQ AX + INCQ R10 + DECQ R13 + JNZ copy_slow_3 + +handle_loop: + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decodeSync_amd64_main_loop + +loop_finished: + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R12, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R11 + MOVQ R11, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_amd64_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with overread error +error_overread: + MOVQ $0x00000006, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: BMI, BMI2, CMOV, SSE +TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + XORQ R9, R9 + MOVQ R9, 8(SP) + MOVQ R9, 16(SP) + MOVQ R9, 24(SP) + MOVQ 112(CX), R9 + MOVQ 128(CX), R10 + MOVQ R10, 32(SP) + MOVQ 144(CX), R10 + MOVQ 136(CX), R11 + MOVQ 200(CX), R12 + MOVQ R12, 56(SP) + MOVQ 176(CX), R12 + MOVQ R12, 48(SP) + MOVQ 184(CX), CX + MOVQ CX, 40(SP) + MOVQ 40(SP), CX + ADDQ CX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R9, 32(SP) + + // outBase += outPosition + ADDQ R11, R9 + +sequenceDecs_decodeSync_bmi2_main_loop: + MOVQ (SP), R12 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_bmi2_fill_end + +sequenceDecs_decodeSync_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_bmi2_fill_check_overread + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_bmi2_fill_byte_by_byte + +sequenceDecs_decodeSync_bmi2_fill_check_overread: + CMPQ DX, $0x40 + JA error_overread + +sequenceDecs_decodeSync_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 8(SP) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_bmi2_fill_2_end + +sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_bmi2_fill_2_check_overread + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_bmi2_fill_2_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte + +sequenceDecs_decodeSync_bmi2_fill_2_check_overread: + CMPQ DX, $0x40 + JA error_overread + +sequenceDecs_decodeSync_bmi2_fill_2_end: + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 24(SP) + + // Fill bitreader for state updates + MOVQ R12, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R12 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_bmi2_skip_update + LEAQ (SI)(DI*1), R13 + ADDQ R8, R13 + MOVBQZX R13, R13 + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + + // Update Offset State + BZHIQ R8, R14, CX + SHRXQ R8, R14, R14 + MOVQ $0x00001010, R13 + BEXTRQ R13, R8, R8 + ADDQ CX, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Match Length State + BZHIQ DI, R14, CX + SHRXQ DI, R14, R14 + MOVQ $0x00001010, R13 + BEXTRQ R13, DI, DI + ADDQ CX, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Literal Length State + BZHIQ SI, R14, CX + MOVQ $0x00001010, R13 + BEXTRQ R13, SI, SI + ADDQ CX, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + +sequenceDecs_decodeSync_bmi2_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ R12, $0x01 + JBE sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_bmi2_after_adjust + +sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero + +sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_bmi2_after_adjust + +sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero: + MOVQ R13, R12 + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, R12 + CMOVQEQ R15, R14 + ADDQ 144(CX)(R12*8), R14 + JNZ sequenceDecs_decodeSync_bmi2_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_bmi2_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_bmi2_adjust_skip + MOVQ 152(CX), R12 + MOVQ R12, 160(CX) + +sequenceDecs_decodeSync_bmi2_adjust_skip: + MOVQ 144(CX), R12 + MOVQ R12, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_bmi2_after_adjust: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), CX + MOVQ 24(SP), R12 + LEAQ (CX)(R12*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ R12, 104(R14) + JS error_not_enough_literals + CMPQ CX, $0x00020002 + JA sequenceDecs_decodeSync_bmi2_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_bmi2_match_len_ofs_ok + TESTQ CX, CX + JNZ sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_bmi2_match_len_ofs_ok: + MOVQ 24(SP), CX + MOVQ 8(SP), R12 + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (CX)(R13*1), R14 + ADDQ R9, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ CX, CX + JZ check_offset + XORQ R14, R14 + +copy_1: + MOVUPS (R10)(R14*1), X0 + MOVUPS X0, (R9)(R14*1) + ADDQ $0x10, R14 + CMPQ R14, CX + JB copy_1 + ADDQ CX, R10 + ADDQ CX, R9 + ADDQ CX, R11 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R11, CX + ADDQ 40(SP), CX + CMPQ R12, CX + JG error_match_off_too_big + CMPQ R12, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ R12, CX + SUBQ R11, CX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ CX, R14 + CMPQ R13, CX + JG copy_all_from_history + MOVQ R13, CX + SUBQ $0x10, CX + JB copy_4_small + +copy_4_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R9) + ADDQ $0x10, R14 + ADDQ $0x10, R9 + SUBQ $0x10, CX + JAE copy_4_loop + LEAQ 16(R14)(CX*1), R14 + LEAQ 16(R9)(CX*1), R9 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R9) + JMP copy_4_end + +copy_4_small: + CMPQ R13, $0x03 + JE copy_4_move_3 + CMPQ R13, $0x08 + JB copy_4_move_4through7 + JMP copy_4_move_8through16 + +copy_4_move_3: + MOVW (R14), CX + MOVB 2(R14), R12 + MOVW CX, (R9) + MOVB R12, 2(R9) + ADDQ R13, R14 + ADDQ R13, R9 + JMP copy_4_end + +copy_4_move_4through7: + MOVL (R14), CX + MOVL -4(R14)(R13*1), R12 + MOVL CX, (R9) + MOVL R12, -4(R9)(R13*1) + ADDQ R13, R14 + ADDQ R13, R9 + JMP copy_4_end + +copy_4_move_8through16: + MOVQ (R14), CX + MOVQ -8(R14)(R13*1), R12 + MOVQ CX, (R9) + MOVQ R12, -8(R9)(R13*1) + ADDQ R13, R14 + ADDQ R13, R9 + +copy_4_end: + ADDQ R13, R11 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + MOVQ CX, R15 + SUBQ $0x10, R15 + JB copy_5_small + +copy_5_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R9) + ADDQ $0x10, R14 + ADDQ $0x10, R9 + SUBQ $0x10, R15 + JAE copy_5_loop + LEAQ 16(R14)(R15*1), R14 + LEAQ 16(R9)(R15*1), R9 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R9) + JMP copy_5_end + +copy_5_small: + CMPQ CX, $0x03 + JE copy_5_move_3 + JB copy_5_move_1or2 + CMPQ CX, $0x08 + JB copy_5_move_4through7 + JMP copy_5_move_8through16 + +copy_5_move_1or2: + MOVB (R14), R15 + MOVB -1(R14)(CX*1), BP + MOVB R15, (R9) + MOVB BP, -1(R9)(CX*1) + ADDQ CX, R14 + ADDQ CX, R9 + JMP copy_5_end + +copy_5_move_3: + MOVW (R14), R15 + MOVB 2(R14), BP + MOVW R15, (R9) + MOVB BP, 2(R9) + ADDQ CX, R14 + ADDQ CX, R9 + JMP copy_5_end + +copy_5_move_4through7: + MOVL (R14), R15 + MOVL -4(R14)(CX*1), BP + MOVL R15, (R9) + MOVL BP, -4(R9)(CX*1) + ADDQ CX, R14 + ADDQ CX, R9 + JMP copy_5_end + +copy_5_move_8through16: + MOVQ (R14), R15 + MOVQ -8(R14)(CX*1), BP + MOVQ R15, (R9) + MOVQ BP, -8(R9)(CX*1) + ADDQ CX, R14 + ADDQ CX, R9 + +copy_5_end: + ADDQ CX, R11 + SUBQ CX, R13 + + // Copy match from the current buffer +copy_match: + MOVQ R9, CX + SUBQ R12, CX + + // ml <= mo + CMPQ R13, R12 + JA copy_overlapping_match + + // Copy non-overlapping match + ADDQ R13, R11 + MOVQ R9, R12 + ADDQ R13, R9 + +copy_2: + MOVUPS (CX), X0 + MOVUPS X0, (R12) + ADDQ $0x10, CX + ADDQ $0x10, R12 + SUBQ $0x10, R13 + JHI copy_2 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + ADDQ R13, R11 + +copy_slow_3: + MOVB (CX), R12 + MOVB R12, (R9) + INCQ CX + INCQ R9 + DECQ R13 + JNZ copy_slow_3 + +handle_loop: + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decodeSync_bmi2_main_loop + +loop_finished: + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R11, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R10 + MOVQ R10, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_bmi2_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with overread error +error_overread: + MOVQ $0x00000006, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: CMOV, SSE +TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + XORQ CX, CX + MOVQ CX, 8(SP) + MOVQ CX, 16(SP) + MOVQ CX, 24(SP) + MOVQ 112(AX), R10 + MOVQ 128(AX), CX + MOVQ CX, 32(SP) + MOVQ 144(AX), R11 + MOVQ 136(AX), R12 + MOVQ 200(AX), CX + MOVQ CX, 56(SP) + MOVQ 176(AX), CX + MOVQ CX, 48(SP) + MOVQ 184(AX), AX + MOVQ AX, 40(SP) + MOVQ 40(SP), AX + ADDQ AX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R10, 32(SP) + + // outBase += outPosition + ADDQ R12, R10 + +sequenceDecs_decodeSync_safe_amd64_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_safe_amd64_fill_end + +sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_safe_amd64_fill_check_overread + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_safe_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte + +sequenceDecs_decodeSync_safe_amd64_fill_check_overread: + CMPQ BX, $0x40 + JA error_overread + +sequenceDecs_decodeSync_safe_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_safe_amd64_of_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_safe_amd64_of_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_safe_amd64_of_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_safe_amd64_of_update_zero: + MOVQ AX, 8(SP) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_safe_amd64_ml_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_safe_amd64_ml_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_safe_amd64_ml_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_safe_amd64_ml_update_zero: + MOVQ AX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_safe_amd64_fill_2_end + +sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_safe_amd64_fill_2_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte + +sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread: + CMPQ BX, $0x40 + JA error_overread + +sequenceDecs_decodeSync_safe_amd64_fill_2_end: + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + SHRQ $0x20, AX + TESTQ CX, CX + JZ sequenceDecs_decodeSync_safe_amd64_ll_update_zero + ADDQ CX, BX + CMPQ BX, $0x40 + JA sequenceDecs_decodeSync_safe_amd64_ll_update_zero + CMPQ CX, $0x40 + JAE sequenceDecs_decodeSync_safe_amd64_ll_update_zero + NEGQ CX + SHRQ CL, R14 + ADDQ R14, AX + +sequenceDecs_decodeSync_safe_amd64_ll_update_zero: + MOVQ AX, 24(SP) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_safe_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R13 + SHRQ $0x10, DI + MOVWQZX DI, DI + LEAQ (BX)(R13*1), CX + MOVQ DX, R14 + MOVQ CX, BX + ROLQ CL, R14 + MOVL $0x00000001, R15 + MOVB R13, CL + SHLL CL, R15 + DECL R15 + ANDQ R15, R14 + ADDQ R14, DI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R13 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + LEAQ (BX)(R13*1), CX + MOVQ DX, R14 + MOVQ CX, BX + ROLQ CL, R14 + MOVL $0x00000001, R15 + MOVB R13, CL + SHLL CL, R15 + DECL R15 + ANDQ R15, R14 + ADDQ R14, R8 + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R13 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + LEAQ (BX)(R13*1), CX + MOVQ DX, R14 + MOVQ CX, BX + ROLQ CL, R14 + MOVL $0x00000001, R15 + MOVB R13, CL + SHLL CL, R15 + DECL R15 + ANDQ R15, R14 + ADDQ R14, R9 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decodeSync_safe_amd64_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ AX, $0x01 + JBE sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_safe_amd64_after_adjust + +sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero + +sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_safe_amd64_after_adjust + +sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero: + MOVQ R13, AX + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, AX + CMOVQEQ R15, R14 + ADDQ 144(CX)(AX*8), R14 + JNZ sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_safe_amd64_adjust_skip + MOVQ 152(CX), AX + MOVQ AX, 160(CX) + +sequenceDecs_decodeSync_safe_amd64_adjust_skip: + MOVQ 144(CX), AX + MOVQ AX, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_safe_amd64_after_adjust: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), AX + MOVQ 24(SP), CX + LEAQ (AX)(CX*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ CX, 104(R14) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok: + MOVQ 24(SP), AX + MOVQ 8(SP), CX + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (AX)(R13*1), R14 + ADDQ R10, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ AX, AX + JZ check_offset + MOVQ AX, R14 + SUBQ $0x10, R14 + JB copy_1_small + +copy_1_loop: + MOVUPS (R11), X0 + MOVUPS X0, (R10) + ADDQ $0x10, R11 + ADDQ $0x10, R10 + SUBQ $0x10, R14 + JAE copy_1_loop + LEAQ 16(R11)(R14*1), R11 + LEAQ 16(R10)(R14*1), R10 + MOVUPS -16(R11), X0 + MOVUPS X0, -16(R10) + JMP copy_1_end + +copy_1_small: + CMPQ AX, $0x03 + JE copy_1_move_3 + JB copy_1_move_1or2 + CMPQ AX, $0x08 + JB copy_1_move_4through7 + JMP copy_1_move_8through16 + +copy_1_move_1or2: + MOVB (R11), R14 + MOVB -1(R11)(AX*1), R15 + MOVB R14, (R10) + MOVB R15, -1(R10)(AX*1) + ADDQ AX, R11 + ADDQ AX, R10 + JMP copy_1_end + +copy_1_move_3: + MOVW (R11), R14 + MOVB 2(R11), R15 + MOVW R14, (R10) + MOVB R15, 2(R10) + ADDQ AX, R11 + ADDQ AX, R10 + JMP copy_1_end + +copy_1_move_4through7: + MOVL (R11), R14 + MOVL -4(R11)(AX*1), R15 + MOVL R14, (R10) + MOVL R15, -4(R10)(AX*1) + ADDQ AX, R11 + ADDQ AX, R10 + JMP copy_1_end + +copy_1_move_8through16: + MOVQ (R11), R14 + MOVQ -8(R11)(AX*1), R15 + MOVQ R14, (R10) + MOVQ R15, -8(R10)(AX*1) + ADDQ AX, R11 + ADDQ AX, R10 + +copy_1_end: + ADDQ AX, R12 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R12, AX + ADDQ 40(SP), AX + CMPQ CX, AX + JG error_match_off_too_big + CMPQ CX, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ CX, AX + SUBQ R12, AX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ AX, R14 + CMPQ R13, AX + JG copy_all_from_history + MOVQ R13, AX + SUBQ $0x10, AX + JB copy_4_small + +copy_4_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R10) + ADDQ $0x10, R14 + ADDQ $0x10, R10 + SUBQ $0x10, AX + JAE copy_4_loop + LEAQ 16(R14)(AX*1), R14 + LEAQ 16(R10)(AX*1), R10 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R10) + JMP copy_4_end + +copy_4_small: + CMPQ R13, $0x03 + JE copy_4_move_3 + CMPQ R13, $0x08 + JB copy_4_move_4through7 + JMP copy_4_move_8through16 + +copy_4_move_3: + MOVW (R14), AX + MOVB 2(R14), CL + MOVW AX, (R10) + MOVB CL, 2(R10) + ADDQ R13, R14 + ADDQ R13, R10 + JMP copy_4_end + +copy_4_move_4through7: + MOVL (R14), AX + MOVL -4(R14)(R13*1), CX + MOVL AX, (R10) + MOVL CX, -4(R10)(R13*1) + ADDQ R13, R14 + ADDQ R13, R10 + JMP copy_4_end + +copy_4_move_8through16: + MOVQ (R14), AX + MOVQ -8(R14)(R13*1), CX + MOVQ AX, (R10) + MOVQ CX, -8(R10)(R13*1) + ADDQ R13, R14 + ADDQ R13, R10 + +copy_4_end: + ADDQ R13, R12 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + MOVQ AX, R15 + SUBQ $0x10, R15 + JB copy_5_small + +copy_5_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R10) + ADDQ $0x10, R14 + ADDQ $0x10, R10 + SUBQ $0x10, R15 + JAE copy_5_loop + LEAQ 16(R14)(R15*1), R14 + LEAQ 16(R10)(R15*1), R10 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R10) + JMP copy_5_end + +copy_5_small: + CMPQ AX, $0x03 + JE copy_5_move_3 + JB copy_5_move_1or2 + CMPQ AX, $0x08 + JB copy_5_move_4through7 + JMP copy_5_move_8through16 + +copy_5_move_1or2: + MOVB (R14), R15 + MOVB -1(R14)(AX*1), BP + MOVB R15, (R10) + MOVB BP, -1(R10)(AX*1) + ADDQ AX, R14 + ADDQ AX, R10 + JMP copy_5_end + +copy_5_move_3: + MOVW (R14), R15 + MOVB 2(R14), BP + MOVW R15, (R10) + MOVB BP, 2(R10) + ADDQ AX, R14 + ADDQ AX, R10 + JMP copy_5_end + +copy_5_move_4through7: + MOVL (R14), R15 + MOVL -4(R14)(AX*1), BP + MOVL R15, (R10) + MOVL BP, -4(R10)(AX*1) + ADDQ AX, R14 + ADDQ AX, R10 + JMP copy_5_end + +copy_5_move_8through16: + MOVQ (R14), R15 + MOVQ -8(R14)(AX*1), BP + MOVQ R15, (R10) + MOVQ BP, -8(R10)(AX*1) + ADDQ AX, R14 + ADDQ AX, R10 + +copy_5_end: + ADDQ AX, R12 + SUBQ AX, R13 + + // Copy match from the current buffer +copy_match: + MOVQ R10, AX + SUBQ CX, AX + + // ml <= mo + CMPQ R13, CX + JA copy_overlapping_match + + // Copy non-overlapping match + ADDQ R13, R12 + MOVQ R13, CX + SUBQ $0x10, CX + JB copy_2_small + +copy_2_loop: + MOVUPS (AX), X0 + MOVUPS X0, (R10) + ADDQ $0x10, AX + ADDQ $0x10, R10 + SUBQ $0x10, CX + JAE copy_2_loop + LEAQ 16(AX)(CX*1), AX + LEAQ 16(R10)(CX*1), R10 + MOVUPS -16(AX), X0 + MOVUPS X0, -16(R10) + JMP copy_2_end + +copy_2_small: + CMPQ R13, $0x03 + JE copy_2_move_3 + JB copy_2_move_1or2 + CMPQ R13, $0x08 + JB copy_2_move_4through7 + JMP copy_2_move_8through16 + +copy_2_move_1or2: + MOVB (AX), CL + MOVB -1(AX)(R13*1), R14 + MOVB CL, (R10) + MOVB R14, -1(R10)(R13*1) + ADDQ R13, AX + ADDQ R13, R10 + JMP copy_2_end + +copy_2_move_3: + MOVW (AX), CX + MOVB 2(AX), R14 + MOVW CX, (R10) + MOVB R14, 2(R10) + ADDQ R13, AX + ADDQ R13, R10 + JMP copy_2_end + +copy_2_move_4through7: + MOVL (AX), CX + MOVL -4(AX)(R13*1), R14 + MOVL CX, (R10) + MOVL R14, -4(R10)(R13*1) + ADDQ R13, AX + ADDQ R13, R10 + JMP copy_2_end + +copy_2_move_8through16: + MOVQ (AX), CX + MOVQ -8(AX)(R13*1), R14 + MOVQ CX, (R10) + MOVQ R14, -8(R10)(R13*1) + ADDQ R13, AX + ADDQ R13, R10 + +copy_2_end: + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + ADDQ R13, R12 + +copy_slow_3: + MOVB (AX), CL + MOVB CL, (R10) + INCQ AX + INCQ R10 + DECQ R13 + JNZ copy_slow_3 + +handle_loop: + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decodeSync_safe_amd64_main_loop + +loop_finished: + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R12, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R11 + MOVQ R11, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with overread error +error_overread: + MOVQ $0x00000006, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: BMI, BMI2, CMOV, SSE +TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + XORQ R9, R9 + MOVQ R9, 8(SP) + MOVQ R9, 16(SP) + MOVQ R9, 24(SP) + MOVQ 112(CX), R9 + MOVQ 128(CX), R10 + MOVQ R10, 32(SP) + MOVQ 144(CX), R10 + MOVQ 136(CX), R11 + MOVQ 200(CX), R12 + MOVQ R12, 56(SP) + MOVQ 176(CX), R12 + MOVQ R12, 48(SP) + MOVQ 184(CX), CX + MOVQ CX, 40(SP) + MOVQ 40(SP), CX + ADDQ CX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R9, 32(SP) + + // outBase += outPosition + ADDQ R11, R9 + +sequenceDecs_decodeSync_safe_bmi2_main_loop: + MOVQ (SP), R12 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_end + +sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_check_overread + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte + +sequenceDecs_decodeSync_safe_bmi2_fill_check_overread: + CMPQ DX, $0x40 + JA error_overread + +sequenceDecs_decodeSync_safe_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 8(SP) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_2_end + +sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte + +sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread: + CMPQ DX, $0x40 + JA error_overread + +sequenceDecs_decodeSync_safe_bmi2_fill_2_end: + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 24(SP) + + // Fill bitreader for state updates + MOVQ R12, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R12 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_safe_bmi2_skip_update + LEAQ (SI)(DI*1), R13 + ADDQ R8, R13 + MOVBQZX R13, R13 + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + + // Update Offset State + BZHIQ R8, R14, CX + SHRXQ R8, R14, R14 + MOVQ $0x00001010, R13 + BEXTRQ R13, R8, R8 + ADDQ CX, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Match Length State + BZHIQ DI, R14, CX + SHRXQ DI, R14, R14 + MOVQ $0x00001010, R13 + BEXTRQ R13, DI, DI + ADDQ CX, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Literal Length State + BZHIQ SI, R14, CX + MOVQ $0x00001010, R13 + BEXTRQ R13, SI, SI + ADDQ CX, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + +sequenceDecs_decodeSync_safe_bmi2_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ R12, $0x01 + JBE sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_safe_bmi2_after_adjust + +sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero + +sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_safe_bmi2_after_adjust + +sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero: + MOVQ R13, R12 + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, R12 + CMOVQEQ R15, R14 + ADDQ 144(CX)(R12*8), R14 + JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_safe_bmi2_adjust_skip + MOVQ 152(CX), R12 + MOVQ R12, 160(CX) + +sequenceDecs_decodeSync_safe_bmi2_adjust_skip: + MOVQ 144(CX), R12 + MOVQ R12, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_safe_bmi2_after_adjust: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), CX + MOVQ 24(SP), R12 + LEAQ (CX)(R12*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ R12, 104(R14) + JS error_not_enough_literals + CMPQ CX, $0x00020002 + JA sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok + TESTQ CX, CX + JNZ sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok: + MOVQ 24(SP), CX + MOVQ 8(SP), R12 + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (CX)(R13*1), R14 + ADDQ R9, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ CX, CX + JZ check_offset + MOVQ CX, R14 + SUBQ $0x10, R14 + JB copy_1_small + +copy_1_loop: + MOVUPS (R10), X0 + MOVUPS X0, (R9) + ADDQ $0x10, R10 + ADDQ $0x10, R9 + SUBQ $0x10, R14 + JAE copy_1_loop + LEAQ 16(R10)(R14*1), R10 + LEAQ 16(R9)(R14*1), R9 + MOVUPS -16(R10), X0 + MOVUPS X0, -16(R9) + JMP copy_1_end + +copy_1_small: + CMPQ CX, $0x03 + JE copy_1_move_3 + JB copy_1_move_1or2 + CMPQ CX, $0x08 + JB copy_1_move_4through7 + JMP copy_1_move_8through16 + +copy_1_move_1or2: + MOVB (R10), R14 + MOVB -1(R10)(CX*1), R15 + MOVB R14, (R9) + MOVB R15, -1(R9)(CX*1) + ADDQ CX, R10 + ADDQ CX, R9 + JMP copy_1_end + +copy_1_move_3: + MOVW (R10), R14 + MOVB 2(R10), R15 + MOVW R14, (R9) + MOVB R15, 2(R9) + ADDQ CX, R10 + ADDQ CX, R9 + JMP copy_1_end + +copy_1_move_4through7: + MOVL (R10), R14 + MOVL -4(R10)(CX*1), R15 + MOVL R14, (R9) + MOVL R15, -4(R9)(CX*1) + ADDQ CX, R10 + ADDQ CX, R9 + JMP copy_1_end + +copy_1_move_8through16: + MOVQ (R10), R14 + MOVQ -8(R10)(CX*1), R15 + MOVQ R14, (R9) + MOVQ R15, -8(R9)(CX*1) + ADDQ CX, R10 + ADDQ CX, R9 + +copy_1_end: + ADDQ CX, R11 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R11, CX + ADDQ 40(SP), CX + CMPQ R12, CX + JG error_match_off_too_big + CMPQ R12, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ R12, CX + SUBQ R11, CX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ CX, R14 + CMPQ R13, CX + JG copy_all_from_history + MOVQ R13, CX + SUBQ $0x10, CX + JB copy_4_small + +copy_4_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R9) + ADDQ $0x10, R14 + ADDQ $0x10, R9 + SUBQ $0x10, CX + JAE copy_4_loop + LEAQ 16(R14)(CX*1), R14 + LEAQ 16(R9)(CX*1), R9 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R9) + JMP copy_4_end + +copy_4_small: + CMPQ R13, $0x03 + JE copy_4_move_3 + CMPQ R13, $0x08 + JB copy_4_move_4through7 + JMP copy_4_move_8through16 + +copy_4_move_3: + MOVW (R14), CX + MOVB 2(R14), R12 + MOVW CX, (R9) + MOVB R12, 2(R9) + ADDQ R13, R14 + ADDQ R13, R9 + JMP copy_4_end + +copy_4_move_4through7: + MOVL (R14), CX + MOVL -4(R14)(R13*1), R12 + MOVL CX, (R9) + MOVL R12, -4(R9)(R13*1) + ADDQ R13, R14 + ADDQ R13, R9 + JMP copy_4_end + +copy_4_move_8through16: + MOVQ (R14), CX + MOVQ -8(R14)(R13*1), R12 + MOVQ CX, (R9) + MOVQ R12, -8(R9)(R13*1) + ADDQ R13, R14 + ADDQ R13, R9 + +copy_4_end: + ADDQ R13, R11 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + MOVQ CX, R15 + SUBQ $0x10, R15 + JB copy_5_small + +copy_5_loop: + MOVUPS (R14), X0 + MOVUPS X0, (R9) + ADDQ $0x10, R14 + ADDQ $0x10, R9 + SUBQ $0x10, R15 + JAE copy_5_loop + LEAQ 16(R14)(R15*1), R14 + LEAQ 16(R9)(R15*1), R9 + MOVUPS -16(R14), X0 + MOVUPS X0, -16(R9) + JMP copy_5_end + +copy_5_small: + CMPQ CX, $0x03 + JE copy_5_move_3 + JB copy_5_move_1or2 + CMPQ CX, $0x08 + JB copy_5_move_4through7 + JMP copy_5_move_8through16 + +copy_5_move_1or2: + MOVB (R14), R15 + MOVB -1(R14)(CX*1), BP + MOVB R15, (R9) + MOVB BP, -1(R9)(CX*1) + ADDQ CX, R14 + ADDQ CX, R9 + JMP copy_5_end + +copy_5_move_3: + MOVW (R14), R15 + MOVB 2(R14), BP + MOVW R15, (R9) + MOVB BP, 2(R9) + ADDQ CX, R14 + ADDQ CX, R9 + JMP copy_5_end + +copy_5_move_4through7: + MOVL (R14), R15 + MOVL -4(R14)(CX*1), BP + MOVL R15, (R9) + MOVL BP, -4(R9)(CX*1) + ADDQ CX, R14 + ADDQ CX, R9 + JMP copy_5_end + +copy_5_move_8through16: + MOVQ (R14), R15 + MOVQ -8(R14)(CX*1), BP + MOVQ R15, (R9) + MOVQ BP, -8(R9)(CX*1) + ADDQ CX, R14 + ADDQ CX, R9 + +copy_5_end: + ADDQ CX, R11 + SUBQ CX, R13 + + // Copy match from the current buffer +copy_match: + MOVQ R9, CX + SUBQ R12, CX + + // ml <= mo + CMPQ R13, R12 + JA copy_overlapping_match + + // Copy non-overlapping match + ADDQ R13, R11 + MOVQ R13, R12 + SUBQ $0x10, R12 + JB copy_2_small + +copy_2_loop: + MOVUPS (CX), X0 + MOVUPS X0, (R9) + ADDQ $0x10, CX + ADDQ $0x10, R9 + SUBQ $0x10, R12 + JAE copy_2_loop + LEAQ 16(CX)(R12*1), CX + LEAQ 16(R9)(R12*1), R9 + MOVUPS -16(CX), X0 + MOVUPS X0, -16(R9) + JMP copy_2_end + +copy_2_small: + CMPQ R13, $0x03 + JE copy_2_move_3 + JB copy_2_move_1or2 + CMPQ R13, $0x08 + JB copy_2_move_4through7 + JMP copy_2_move_8through16 + +copy_2_move_1or2: + MOVB (CX), R12 + MOVB -1(CX)(R13*1), R14 + MOVB R12, (R9) + MOVB R14, -1(R9)(R13*1) + ADDQ R13, CX + ADDQ R13, R9 + JMP copy_2_end + +copy_2_move_3: + MOVW (CX), R12 + MOVB 2(CX), R14 + MOVW R12, (R9) + MOVB R14, 2(R9) + ADDQ R13, CX + ADDQ R13, R9 + JMP copy_2_end + +copy_2_move_4through7: + MOVL (CX), R12 + MOVL -4(CX)(R13*1), R14 + MOVL R12, (R9) + MOVL R14, -4(R9)(R13*1) + ADDQ R13, CX + ADDQ R13, R9 + JMP copy_2_end + +copy_2_move_8through16: + MOVQ (CX), R12 + MOVQ -8(CX)(R13*1), R14 + MOVQ R12, (R9) + MOVQ R14, -8(R9)(R13*1) + ADDQ R13, CX + ADDQ R13, R9 + +copy_2_end: + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + ADDQ R13, R11 + +copy_slow_3: + MOVB (CX), R12 + MOVB R12, (R9) + INCQ CX + INCQ R9 + DECQ R13 + JNZ copy_slow_3 + +handle_loop: + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decodeSync_safe_bmi2_main_loop + +loop_finished: + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R11, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R10 + MOVQ R10, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with overread error +error_overread: + MOVQ $0x00000006, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go new file mode 100644 index 00000000..ac2a80d2 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go @@ -0,0 +1,237 @@ +//go:build !amd64 || appengine || !gc || noasm +// +build !amd64 appengine !gc noasm + +package zstd + +import ( + "fmt" + "io" +) + +// decode sequences from the stream with the provided history but without dictionary. +func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { + return false, nil +} + +// decode sequences from the stream without the provided history. +func (s *sequenceDecs) decode(seqs []seqVals) error { + br := s.br + + // Grab full sizes tables, to avoid bounds checks. + llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize] + llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state + s.seqSize = 0 + litRemain := len(s.literals) + + maxBlockSize := maxCompressedBlockSize + if s.windowSize < maxBlockSize { + maxBlockSize = s.windowSize + } + for i := range seqs { + var ll, mo, ml int + if br.off > 4+((maxOffsetBits+16+16)>>3) { + // inlined function: + // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) + + // Final will not read from stream. + var llB, mlB, moB uint8 + ll, llB = llState.final() + ml, mlB = mlState.final() + mo, moB = ofState.final() + + // extra bits are stored in reverse order. + br.fillFast() + mo += br.getBits(moB) + if s.maxBits > 32 { + br.fillFast() + } + ml += br.getBits(mlB) + ll += br.getBits(llB) + + if moB > 1 { + s.prevOffset[2] = s.prevOffset[1] + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = mo + } else { + // mo = s.adjustOffset(mo, ll, moB) + // Inlined for rather big speedup + if ll == 0 { + // There is an exception though, when current sequence's literals_length = 0. + // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, + // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte. + mo++ + } + + if mo == 0 { + mo = s.prevOffset[0] + } else { + var temp int + if mo == 3 { + temp = s.prevOffset[0] - 1 + } else { + temp = s.prevOffset[mo] + } + + if temp == 0 { + // 0 is not valid; input is corrupted; force offset to 1 + println("WARNING: temp was 0") + temp = 1 + } + + if mo != 1 { + s.prevOffset[2] = s.prevOffset[1] + } + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = temp + mo = temp + } + } + br.fillFast() + } else { + if br.overread() { + if debugDecoder { + printf("reading sequence %d, exceeded available data\n", i) + } + return io.ErrUnexpectedEOF + } + ll, mo, ml = s.next(br, llState, mlState, ofState) + br.fill() + } + + if debugSequences { + println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml) + } + // Evaluate. + // We might be doing this async, so do it early. + if mo == 0 && ml > 0 { + return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) + } + if ml > maxMatchLen { + return fmt.Errorf("match len (%d) bigger than max allowed length", ml) + } + s.seqSize += ll + ml + if s.seqSize > maxBlockSize { + return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) + } + litRemain -= ll + if litRemain < 0 { + return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll) + } + seqs[i] = seqVals{ + ll: ll, + ml: ml, + mo: mo, + } + if i == len(seqs)-1 { + // This is the last sequence, so we shouldn't update state. + break + } + + // Manually inlined, ~ 5-20% faster + // Update all 3 states at once. Approx 20% faster. + nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits() + if nBits == 0 { + llState = llTable[llState.newState()&maxTableMask] + mlState = mlTable[mlState.newState()&maxTableMask] + ofState = ofTable[ofState.newState()&maxTableMask] + } else { + bits := br.get32BitsFast(nBits) + lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31)) + llState = llTable[(llState.newState()+lowBits)&maxTableMask] + + lowBits = uint16(bits >> (ofState.nbBits() & 31)) + lowBits &= bitMask[mlState.nbBits()&15] + mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask] + + lowBits = uint16(bits) & bitMask[ofState.nbBits()&15] + ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask] + } + } + s.seqSize += litRemain + if s.seqSize > maxBlockSize { + return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) + } + err := br.close() + if err != nil { + printf("Closing sequences: %v, %+v\n", err, *br) + } + return err +} + +// executeSimple handles cases when a dictionary is not used. +func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error { + // Ensure we have enough output size... + if len(s.out)+s.seqSize > cap(s.out) { + addBytes := s.seqSize + len(s.out) + s.out = append(s.out, make([]byte, addBytes)...) + s.out = s.out[:len(s.out)-addBytes] + } + + if debugDecoder { + printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize) + } + + var t = len(s.out) + out := s.out[:t+s.seqSize] + + for _, seq := range seqs { + // Add literals + copy(out[t:], s.literals[:seq.ll]) + t += seq.ll + s.literals = s.literals[seq.ll:] + + // Malformed input + if seq.mo > t+len(hist) || seq.mo > s.windowSize { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist)) + } + + // Copy from history. + if v := seq.mo - t; v > 0 { + // v is the start position in history from end. + start := len(hist) - v + if seq.ml > v { + // Some goes into the current block. + // Copy remainder of history + copy(out[t:], hist[start:]) + t += v + seq.ml -= v + } else { + copy(out[t:], hist[start:start+seq.ml]) + t += seq.ml + continue + } + } + + // We must be in the current buffer now + if seq.ml > 0 { + start := t - seq.mo + if seq.ml <= t-start { + // No overlap + copy(out[t:], out[start:start+seq.ml]) + t += seq.ml + } else { + // Overlapping copy + // Extend destination slice and copy one byte at the time. + src := out[start : start+seq.ml] + dst := out[t:] + dst = dst[:len(src)] + t += len(src) + // Destination is the space we just added. + for i := range src { + dst[i] = src[i] + } + } + } + } + // Add final literals + copy(out[t:], s.literals) + if debugDecoder { + t += len(s.literals) + if t != len(out) { + panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize)) + } + } + s.out = out + + return nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/zip.go b/vendor/github.com/klauspost/compress/zstd/zip.go index 967f29b3..29c15c8c 100644 --- a/vendor/github.com/klauspost/compress/zstd/zip.go +++ b/vendor/github.com/klauspost/compress/zstd/zip.go @@ -18,36 +18,58 @@ const ZipMethodWinZip = 93 // See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT const ZipMethodPKWare = 20 -var zipReaderPool sync.Pool +// zipReaderPool is the default reader pool. +var zipReaderPool = sync.Pool{New: func() interface{} { + z, err := NewReader(nil, WithDecoderLowmem(true), WithDecoderMaxWindow(128<<20), WithDecoderConcurrency(1)) + if err != nil { + panic(err) + } + return z +}} -// newZipReader cannot be used since we would leak goroutines... -func newZipReader(r io.Reader) io.ReadCloser { - dec, ok := zipReaderPool.Get().(*Decoder) - if ok { - dec.Reset(r) - } else { - d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true)) - if err != nil { - panic(err) +// newZipReader creates a pooled zip decompressor. +func newZipReader(opts ...DOption) func(r io.Reader) io.ReadCloser { + pool := &zipReaderPool + if len(opts) > 0 { + opts = append([]DOption{WithDecoderLowmem(true), WithDecoderMaxWindow(128 << 20)}, opts...) + // Force concurrency 1 + opts = append(opts, WithDecoderConcurrency(1)) + // Create our own pool + pool = &sync.Pool{} + } + return func(r io.Reader) io.ReadCloser { + dec, ok := pool.Get().(*Decoder) + if ok { + dec.Reset(r) + } else { + d, err := NewReader(r, opts...) + if err != nil { + panic(err) + } + dec = d } - dec = d + return &pooledZipReader{dec: dec, pool: pool} } - return &pooledZipReader{dec: dec} } type pooledZipReader struct { - mu sync.Mutex // guards Close and Read - dec *Decoder + mu sync.Mutex // guards Close and Read + pool *sync.Pool + dec *Decoder } func (r *pooledZipReader) Read(p []byte) (n int, err error) { r.mu.Lock() defer r.mu.Unlock() if r.dec == nil { - return 0, errors.New("Read after Close") + return 0, errors.New("read after close or EOF") } dec, err := r.dec.Read(p) - + if err == io.EOF { + r.dec.Reset(nil) + r.pool.Put(r.dec) + r.dec = nil + } return dec, err } @@ -57,7 +79,7 @@ func (r *pooledZipReader) Close() error { var err error if r.dec != nil { err = r.dec.Reset(nil) - zipReaderPool.Put(r.dec) + r.pool.Put(r.dec) r.dec = nil } return err @@ -111,12 +133,9 @@ func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) { // ZipDecompressor returns a decompressor that can be registered with zip libraries. // See ZipCompressor for example. -func ZipDecompressor() func(r io.Reader) io.ReadCloser { - return func(r io.Reader) io.ReadCloser { - d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true)) - if err != nil { - panic(err) - } - return d.IOReadCloser() - } +// Options can be specified. WithDecoderConcurrency(1) is forced, +// and by default a 128MB maximum decompression window is specified. +// The window size can be overridden if required. +func ZipDecompressor(opts ...DOption) func(r io.Reader) io.ReadCloser { + return newZipReader(opts...) } diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index ef1d49a0..4be7cc73 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -9,7 +9,6 @@ import ( "errors" "log" "math" - "math/bits" ) // enable debug printing @@ -36,8 +35,8 @@ const forcePreDef = false // zstdMinMatch is the minimum zstd match length. const zstdMinMatch = 3 -// Reset the buffer offset when reaching this. -const bufferReset = math.MaxInt32 - MaxWindowSize +// fcsUnknown is used for unknown frame content size. +const fcsUnknown = math.MaxUint64 var ( // ErrReservedBlockType is returned when a reserved block type is found. @@ -52,6 +51,10 @@ var ( // Typically returned on invalid input. ErrBlockTooSmall = errors.New("block too small") + // ErrUnexpectedBlockSize is returned when a block has unexpected size. + // Typically returned on invalid input. + ErrUnexpectedBlockSize = errors.New("unexpected block size") + // ErrMagicMismatch is returned when a "magic" number isn't what is expected. // Typically this indicates wrong or corrupted input. ErrMagicMismatch = errors.New("invalid input: magic number mismatch") @@ -68,13 +71,16 @@ var ( ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit") // ErrUnknownDictionary is returned if the dictionary ID is unknown. - // For the time being dictionaries are not supported. ErrUnknownDictionary = errors.New("unknown dictionary") // ErrFrameSizeExceeded is returned if the stated frame size is exceeded. // This is only returned if SingleSegment is specified on the frame. ErrFrameSizeExceeded = errors.New("frame size exceeded") + // ErrFrameSizeMismatch is returned if the stated frame size does not match the expected size. + // This is only returned if SingleSegment is specified on the frame. + ErrFrameSizeMismatch = errors.New("frame size does not match size on stream") + // ErrCRCMismatch is returned if CRC mismatches. ErrCRCMismatch = errors.New("CRC check failed") @@ -99,49 +105,12 @@ func printf(format string, a ...interface{}) { } } -// matchLenFast does matching, but will not match the last up to 7 bytes. -func matchLenFast(a, b []byte) int { - endI := len(a) & (math.MaxInt32 - 7) - for i := 0; i < endI; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - return i + bits.TrailingZeros64(diff)>>3 - } - } - return endI -} - -// matchLen returns the maximum length. -// a must be the shortest of the two. -// The function also returns whether all bytes matched. -func matchLen(a, b []byte) int { - b = b[:len(a)] - for i := 0; i < len(a)-7; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - return i + (bits.TrailingZeros64(diff) >> 3) - } - } - - checked := (len(a) >> 3) << 3 - a = a[checked:] - b = b[checked:] - for i := range a { - if a[i] != b[i] { - return i + checked - } - } - return len(a) + checked -} - func load3232(b []byte, i int32) uint32 { - return binary.LittleEndian.Uint32(b[i:]) + return binary.LittleEndian.Uint32(b[:len(b):len(b)][i:]) } func load6432(b []byte, i int32) uint64 { - return binary.LittleEndian.Uint64(b[i:]) -} - -func load64(b []byte, i int) uint64 { - return binary.LittleEndian.Uint64(b[i:]) + return binary.LittleEndian.Uint64(b[:len(b):len(b)][i:]) } type byter interface { diff --git a/vendor/go.mongodb.org/mongo-driver/v2/LICENSE b/vendor/go.mongodb.org/mongo-driver/v2/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/array_codec.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/array_codec.go new file mode 100644 index 00000000..4642fb6e --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/array_codec.go @@ -0,0 +1,42 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package bson + +import ( + "reflect" + + "go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore" +) + +// arrayCodec is the Codec used for bsoncore.Array values. +type arrayCodec struct{} + +// EncodeValue is the ValueEncoder for bsoncore.Array values. +func (ac *arrayCodec) EncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tCoreArray { + return ValueEncoderError{Name: "CoreArrayEncodeValue", Types: []reflect.Type{tCoreArray}, Received: val} + } + + arr := val.Interface().(bsoncore.Array) + return copyArrayFromBytes(vw, arr) +} + +// DecodeValue is the ValueDecoder for bsoncore.Array values. +func (ac *arrayCodec) DecodeValue(_ DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tCoreArray { + return ValueDecoderError{Name: "CoreArrayDecodeValue", Types: []reflect.Type{tCoreArray}, Received: val} + } + + if val.IsNil() { + val.Set(reflect.MakeSlice(val.Type(), 0, 0)) + } + + val.SetLen(0) + arr, err := appendArrayBytes(val.Interface().(bsoncore.Array), vr) + val.Set(reflect.ValueOf(arr)) + return err +} diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/bsoncodec.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/bsoncodec.go new file mode 100644 index 00000000..91592a3e --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/bsoncodec.go @@ -0,0 +1,203 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package bson + +import ( + "fmt" + "reflect" + "strings" +) + +var ( + emptyValue = reflect.Value{} +) + +// ValueEncoderError is an error returned from a ValueEncoder when the provided value can't be +// encoded by the ValueEncoder. +type ValueEncoderError struct { + Name string + Types []reflect.Type + Kinds []reflect.Kind + Received reflect.Value +} + +func (vee ValueEncoderError) Error() string { + typeKinds := make([]string, 0, len(vee.Types)+len(vee.Kinds)) + for _, t := range vee.Types { + typeKinds = append(typeKinds, t.String()) + } + for _, k := range vee.Kinds { + if k == reflect.Map { + typeKinds = append(typeKinds, "map[string]*") + continue + } + typeKinds = append(typeKinds, k.String()) + } + received := vee.Received.Kind().String() + if vee.Received.IsValid() { + received = vee.Received.Type().String() + } + return fmt.Sprintf("%s can only encode valid %s, but got %s", vee.Name, strings.Join(typeKinds, ", "), received) +} + +// ValueDecoderError is an error returned from a ValueDecoder when the provided value can't be +// decoded by the ValueDecoder. +type ValueDecoderError struct { + Name string + Types []reflect.Type + Kinds []reflect.Kind + Received reflect.Value +} + +func (vde ValueDecoderError) Error() string { + typeKinds := make([]string, 0, len(vde.Types)+len(vde.Kinds)) + for _, t := range vde.Types { + typeKinds = append(typeKinds, t.String()) + } + for _, k := range vde.Kinds { + if k == reflect.Map { + typeKinds = append(typeKinds, "map[string]*") + continue + } + typeKinds = append(typeKinds, k.String()) + } + received := vde.Received.Kind().String() + if vde.Received.IsValid() { + received = vde.Received.Type().String() + } + if !vde.Received.CanSet() { + received = "unsettable " + received + } + return fmt.Sprintf("%s can only decode valid and settable %s, but got %s", vde.Name, strings.Join(typeKinds, ", "), received) +} + +// EncodeContext is the contextual information required for a Codec to encode a +// value. +type EncodeContext struct { + *Registry + + // minSize causes the Encoder to marshal Go integer values (int, int8, int16, int32, int64, + // uint, uint8, uint16, uint32, or uint64) as the minimum BSON int size (either 32 or 64 bits) + // that can represent the integer value. + minSize bool + + errorOnInlineDuplicates bool + stringifyMapKeysWithFmt bool + nilMapAsEmpty bool + nilSliceAsEmpty bool + nilByteSliceAsEmpty bool + omitZeroStruct bool + omitEmpty bool + useJSONStructTags bool +} + +// DecodeContext is the contextual information required for a Codec to decode a +// value. +type DecodeContext struct { + *Registry + + // truncate, if true, instructs decoders to to truncate the fractional part of BSON "double" + // values when attempting to unmarshal them into a Go integer (int, int8, int16, int32, int64, + // uint, uint8, uint16, uint32, or uint64) struct field. The truncation logic does not apply to + // BSON "decimal128" values. + truncate bool + + // defaultDocumentType specifies the Go type to decode top-level and nested BSON documents into. In particular, the + // usage for this field is restricted to data typed as "interface{}" or "map[string]interface{}". If DocumentType is + // set to a type that a BSON document cannot be unmarshaled into (e.g. "string"), unmarshalling will result in an + // error. + defaultDocumentType reflect.Type + + binaryAsSlice bool + + // a false value results in a decoding error. + objectIDAsHexString bool + + useJSONStructTags bool + useLocalTimeZone bool + zeroMaps bool + zeroStructs bool +} + +// ValueEncoder is the interface implemented by types that can encode a provided Go type to BSON. +// The value to encode is provided as a reflect.Value and a bson.ValueWriter is used within the +// EncodeValue method to actually create the BSON representation. For convenience, ValueEncoderFunc +// is provided to allow use of a function with the correct signature as a ValueEncoder. An +// EncodeContext instance is provided to allow implementations to lookup further ValueEncoders and +// to provide configuration information. +type ValueEncoder interface { + EncodeValue(EncodeContext, ValueWriter, reflect.Value) error +} + +// ValueEncoderFunc is an adapter function that allows a function with the correct signature to be +// used as a ValueEncoder. +type ValueEncoderFunc func(EncodeContext, ValueWriter, reflect.Value) error + +// EncodeValue implements the ValueEncoder interface. +func (fn ValueEncoderFunc) EncodeValue(ec EncodeContext, vw ValueWriter, val reflect.Value) error { + return fn(ec, vw, val) +} + +// ValueDecoder is the interface implemented by types that can decode BSON to a provided Go type. +// Implementations should ensure that the value they receive is settable. Similar to ValueEncoderFunc, +// ValueDecoderFunc is provided to allow the use of a function with the correct signature as a +// ValueDecoder. A DecodeContext instance is provided and serves similar functionality to the +// EncodeContext. +type ValueDecoder interface { + DecodeValue(DecodeContext, ValueReader, reflect.Value) error +} + +// ValueDecoderFunc is an adapter function that allows a function with the correct signature to be +// used as a ValueDecoder. +type ValueDecoderFunc func(DecodeContext, ValueReader, reflect.Value) error + +// DecodeValue implements the ValueDecoder interface. +func (fn ValueDecoderFunc) DecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + return fn(dc, vr, val) +} + +// typeDecoder is the interface implemented by types that can handle the decoding of a value given its type. +type typeDecoder interface { + decodeType(DecodeContext, ValueReader, reflect.Type) (reflect.Value, error) +} + +// typeDecoderFunc is an adapter function that allows a function with the correct signature to be used as a typeDecoder. +type typeDecoderFunc func(DecodeContext, ValueReader, reflect.Type) (reflect.Value, error) + +func (fn typeDecoderFunc) decodeType(dc DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + return fn(dc, vr, t) +} + +// decodeAdapter allows two functions with the correct signatures to be used as both a ValueDecoder and typeDecoder. +type decodeAdapter struct { + ValueDecoderFunc + typeDecoderFunc +} + +var _ ValueDecoder = decodeAdapter{} +var _ typeDecoder = decodeAdapter{} + +func decodeTypeOrValueWithInfo(vd ValueDecoder, dc DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if td, _ := vd.(typeDecoder); td != nil { + val, err := td.decodeType(dc, vr, t) + if err == nil && val.Type() != t { + // This conversion step is necessary for slices and maps. If a user declares variables like: + // + // type myBool bool + // var m map[string]myBool + // + // and tries to decode BSON bytes into the map, the decoding will fail if this conversion is not present + // because we'll try to assign a value of type bool to one of type myBool. + val = val.Convert(t) + } + return val, err + } + + val := reflect.New(t).Elem() + err := vd.DecodeValue(dc, vr, val) + return val, err +} diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/byte_slice_codec.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/byte_slice_codec.go new file mode 100644 index 00000000..bd44cf9a --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/byte_slice_codec.go @@ -0,0 +1,97 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package bson + +import ( + "fmt" + "reflect" +) + +// byteSliceCodec is the Codec used for []byte values. +type byteSliceCodec struct { + // encodeNilAsEmpty causes EncodeValue to marshal nil Go byte slices as empty BSON binary values + // instead of BSON null. + encodeNilAsEmpty bool +} + +// Assert that byteSliceCodec satisfies the typeDecoder interface, which allows it to be +// used by collection type decoders (e.g. map, slice, etc) to set individual values in a +// collection. +var _ typeDecoder = &byteSliceCodec{} + +// EncodeValue is the ValueEncoder for []byte. +func (bsc *byteSliceCodec) EncodeValue(ec EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tByteSlice { + return ValueEncoderError{Name: "ByteSliceEncodeValue", Types: []reflect.Type{tByteSlice}, Received: val} + } + if val.IsNil() && !bsc.encodeNilAsEmpty && !ec.nilByteSliceAsEmpty { + return vw.WriteNull() + } + return vw.WriteBinary(val.Interface().([]byte)) +} + +func (bsc *byteSliceCodec) decodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tByteSlice { + return emptyValue, ValueDecoderError{ + Name: "ByteSliceDecodeValue", + Types: []reflect.Type{tByteSlice}, + Received: reflect.Zero(t), + } + } + + var data []byte + var err error + switch vrType := vr.Type(); vrType { + case TypeString: + str, err := vr.ReadString() + if err != nil { + return emptyValue, err + } + data = []byte(str) + case TypeSymbol: + sym, err := vr.ReadSymbol() + if err != nil { + return emptyValue, err + } + data = []byte(sym) + case TypeBinary: + var subtype byte + data, subtype, err = vr.ReadBinary() + if err != nil { + return emptyValue, err + } + if subtype != TypeBinaryGeneric && subtype != TypeBinaryBinaryOld { + return emptyValue, decodeBinaryError{subtype: subtype, typeName: "[]byte"} + } + case TypeNull: + err = vr.ReadNull() + case TypeUndefined: + err = vr.ReadUndefined() + default: + return emptyValue, fmt.Errorf("cannot decode %v into a []byte", vrType) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(data), nil +} + +// DecodeValue is the ValueDecoder for []byte. +func (bsc *byteSliceCodec) DecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tByteSlice { + return ValueDecoderError{Name: "ByteSliceDecodeValue", Types: []reflect.Type{tByteSlice}, Received: val} + } + + elem, err := bsc.decodeType(dc, vr, tByteSlice) + if err != nil { + return err + } + + val.Set(elem) + return nil +} diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/codec_cache.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/codec_cache.go new file mode 100644 index 00000000..b4042822 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/codec_cache.go @@ -0,0 +1,166 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package bson + +import ( + "reflect" + "sync" + "sync/atomic" +) + +// Runtime check that the kind encoder and decoder caches can store any valid +// reflect.Kind constant. +func init() { + if s := reflect.Kind(len(kindEncoderCache{}.entries)).String(); s != "kind27" { + panic("The capacity of kindEncoderCache is too small.\n" + + "This is due to a new type being added to reflect.Kind.") + } +} + +// statically assert array size +var _ = (kindEncoderCache{}).entries[reflect.UnsafePointer] +var _ = (kindDecoderCache{}).entries[reflect.UnsafePointer] + +type typeEncoderCache struct { + cache sync.Map // map[reflect.Type]ValueEncoder +} + +func (c *typeEncoderCache) Store(rt reflect.Type, enc ValueEncoder) { + c.cache.Store(rt, enc) +} + +func (c *typeEncoderCache) Load(rt reflect.Type) (ValueEncoder, bool) { + if v, _ := c.cache.Load(rt); v != nil { + return v.(ValueEncoder), true + } + return nil, false +} + +func (c *typeEncoderCache) LoadOrStore(rt reflect.Type, enc ValueEncoder) ValueEncoder { + if v, loaded := c.cache.LoadOrStore(rt, enc); loaded { + enc = v.(ValueEncoder) + } + return enc +} + +func (c *typeEncoderCache) Clone() *typeEncoderCache { + cc := new(typeEncoderCache) + c.cache.Range(func(k, v interface{}) bool { + if k != nil && v != nil { + cc.cache.Store(k, v) + } + return true + }) + return cc +} + +type typeDecoderCache struct { + cache sync.Map // map[reflect.Type]ValueDecoder +} + +func (c *typeDecoderCache) Store(rt reflect.Type, dec ValueDecoder) { + c.cache.Store(rt, dec) +} + +func (c *typeDecoderCache) Load(rt reflect.Type) (ValueDecoder, bool) { + if v, _ := c.cache.Load(rt); v != nil { + return v.(ValueDecoder), true + } + return nil, false +} + +func (c *typeDecoderCache) LoadOrStore(rt reflect.Type, dec ValueDecoder) ValueDecoder { + if v, loaded := c.cache.LoadOrStore(rt, dec); loaded { + dec = v.(ValueDecoder) + } + return dec +} + +func (c *typeDecoderCache) Clone() *typeDecoderCache { + cc := new(typeDecoderCache) + c.cache.Range(func(k, v interface{}) bool { + if k != nil && v != nil { + cc.cache.Store(k, v) + } + return true + }) + return cc +} + +// atomic.Value requires that all calls to Store() have the same concrete type +// so we wrap the ValueEncoder with a kindEncoderCacheEntry to ensure the type +// is always the same (since different concrete types may implement the +// ValueEncoder interface). +type kindEncoderCacheEntry struct { + enc ValueEncoder +} + +type kindEncoderCache struct { + entries [reflect.UnsafePointer + 1]atomic.Value // *kindEncoderCacheEntry +} + +func (c *kindEncoderCache) Store(rt reflect.Kind, enc ValueEncoder) { + if enc != nil && rt < reflect.Kind(len(c.entries)) { + c.entries[rt].Store(&kindEncoderCacheEntry{enc: enc}) + } +} + +func (c *kindEncoderCache) Load(rt reflect.Kind) (ValueEncoder, bool) { + if rt < reflect.Kind(len(c.entries)) { + if ent, ok := c.entries[rt].Load().(*kindEncoderCacheEntry); ok { + return ent.enc, ent.enc != nil + } + } + return nil, false +} + +func (c *kindEncoderCache) Clone() *kindEncoderCache { + cc := new(kindEncoderCache) + for i, v := range c.entries { + if val := v.Load(); val != nil { + cc.entries[i].Store(val) + } + } + return cc +} + +// atomic.Value requires that all calls to Store() have the same concrete type +// so we wrap the ValueDecoder with a kindDecoderCacheEntry to ensure the type +// is always the same (since different concrete types may implement the +// ValueDecoder interface). +type kindDecoderCacheEntry struct { + dec ValueDecoder +} + +type kindDecoderCache struct { + entries [reflect.UnsafePointer + 1]atomic.Value // *kindDecoderCacheEntry +} + +func (c *kindDecoderCache) Store(rt reflect.Kind, dec ValueDecoder) { + if rt < reflect.Kind(len(c.entries)) { + c.entries[rt].Store(&kindDecoderCacheEntry{dec: dec}) + } +} + +func (c *kindDecoderCache) Load(rt reflect.Kind) (ValueDecoder, bool) { + if rt < reflect.Kind(len(c.entries)) { + if ent, ok := c.entries[rt].Load().(*kindDecoderCacheEntry); ok { + return ent.dec, ent.dec != nil + } + } + return nil, false +} + +func (c *kindDecoderCache) Clone() *kindDecoderCache { + cc := new(kindDecoderCache) + for i, v := range c.entries { + if val := v.Load(); val != nil { + cc.entries[i].Store(val) + } + } + return cc +} diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/cond_addr_codec.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/cond_addr_codec.go new file mode 100644 index 00000000..fed4d1f8 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/cond_addr_codec.go @@ -0,0 +1,61 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package bson + +import ( + "reflect" +) + +// condAddrEncoder is the encoder used when a pointer to the encoding value has an encoder. +type condAddrEncoder struct { + canAddrEnc ValueEncoder + elseEnc ValueEncoder +} + +var _ ValueEncoder = &condAddrEncoder{} + +// newCondAddrEncoder returns an condAddrEncoder. +func newCondAddrEncoder(canAddrEnc, elseEnc ValueEncoder) *condAddrEncoder { + encoder := condAddrEncoder{canAddrEnc: canAddrEnc, elseEnc: elseEnc} + return &encoder +} + +// EncodeValue is the ValueEncoderFunc for a value that may be addressable. +func (cae *condAddrEncoder) EncodeValue(ec EncodeContext, vw ValueWriter, val reflect.Value) error { + if val.CanAddr() { + return cae.canAddrEnc.EncodeValue(ec, vw, val) + } + if cae.elseEnc != nil { + return cae.elseEnc.EncodeValue(ec, vw, val) + } + return errNoEncoder{Type: val.Type()} +} + +// condAddrDecoder is the decoder used when a pointer to the value has a decoder. +type condAddrDecoder struct { + canAddrDec ValueDecoder + elseDec ValueDecoder +} + +var _ ValueDecoder = &condAddrDecoder{} + +// newCondAddrDecoder returns an CondAddrDecoder. +func newCondAddrDecoder(canAddrDec, elseDec ValueDecoder) *condAddrDecoder { + decoder := condAddrDecoder{canAddrDec: canAddrDec, elseDec: elseDec} + return &decoder +} + +// DecodeValue is the ValueDecoderFunc for a value that may be addressable. +func (cad *condAddrDecoder) DecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if val.CanAddr() { + return cad.canAddrDec.DecodeValue(dc, vr, val) + } + if cad.elseDec != nil { + return cad.elseDec.DecodeValue(dc, vr, val) + } + return errNoDecoder{Type: val.Type()} +} diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/copier.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/copier.go new file mode 100644 index 00000000..6279f072 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/copier.go @@ -0,0 +1,431 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package bson + +import ( + "bytes" + "errors" + "fmt" + "io" + + "go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore" +) + +// copyDocument handles copying one document from the src to the dst. +func copyDocument(dst ValueWriter, src ValueReader) error { + dr, err := src.ReadDocument() + if err != nil { + return err + } + + dw, err := dst.WriteDocument() + if err != nil { + return err + } + + return copyDocumentCore(dw, dr) +} + +// copyArrayFromBytes copies the values from a BSON array represented as a +// []byte to a ValueWriter. +func copyArrayFromBytes(dst ValueWriter, src []byte) error { + aw, err := dst.WriteArray() + if err != nil { + return err + } + + err = copyBytesToArrayWriter(aw, src) + if err != nil { + return err + } + + return aw.WriteArrayEnd() +} + +// copyDocumentFromBytes copies the values from a BSON document represented as a +// []byte to a ValueWriter. +func copyDocumentFromBytes(dst ValueWriter, src []byte) error { + dw, err := dst.WriteDocument() + if err != nil { + return err + } + + err = copyBytesToDocumentWriter(dw, src) + if err != nil { + return err + } + + return dw.WriteDocumentEnd() +} + +type writeElementFn func(key string) (ValueWriter, error) + +// copyBytesToArrayWriter copies the values from a BSON Array represented as a []byte to an +// ArrayWriter. +func copyBytesToArrayWriter(dst ArrayWriter, src []byte) error { + wef := func(_ string) (ValueWriter, error) { + return dst.WriteArrayElement() + } + + return copyBytesToValueWriter(src, wef) +} + +// copyBytesToDocumentWriter copies the values from a BSON document represented as a []byte to a +// DocumentWriter. +func copyBytesToDocumentWriter(dst DocumentWriter, src []byte) error { + wef := func(key string) (ValueWriter, error) { + return dst.WriteDocumentElement(key) + } + + return copyBytesToValueWriter(src, wef) +} + +func copyBytesToValueWriter(src []byte, wef writeElementFn) error { + // TODO(skriptble): Create errors types here. Anything that is a tag should be a property. + length, rem, ok := bsoncore.ReadLength(src) + if !ok { + return fmt.Errorf("couldn't read length from src, not enough bytes. length=%d", len(src)) + } + if len(src) < int(length) { + return fmt.Errorf("length read exceeds number of bytes available. length=%d bytes=%d", len(src), length) + } + rem = rem[:length-4] + + var t bsoncore.Type + var key string + var val bsoncore.Value + for { + t, rem, ok = bsoncore.ReadType(rem) + if !ok { + return io.EOF + } + if t == bsoncore.Type(0) { + if len(rem) != 0 { + return fmt.Errorf("document end byte found before end of document. remaining bytes=%v", rem) + } + break + } + + key, rem, ok = bsoncore.ReadKey(rem) + if !ok { + return fmt.Errorf("invalid key found. remaining bytes=%v", rem) + } + + // write as either array element or document element using writeElementFn + vw, err := wef(key) + if err != nil { + return err + } + + val, rem, ok = bsoncore.ReadValue(rem, t) + if !ok { + return fmt.Errorf("not enough bytes available to read type. bytes=%d type=%s", len(rem), t) + } + err = copyValueFromBytes(vw, Type(t), val.Data) + if err != nil { + return err + } + } + return nil +} + +// copyDocumentToBytes copies an entire document from the ValueReader and +// returns it as bytes. +func copyDocumentToBytes(src ValueReader) ([]byte, error) { + return appendDocumentBytes(nil, src) +} + +// appendDocumentBytes functions the same as CopyDocumentToBytes, but will +// append the result to dst. +func appendDocumentBytes(dst []byte, src ValueReader) ([]byte, error) { + if br, ok := src.(bytesReader); ok { + _, dst, err := br.readValueBytes(dst) + return dst, err + } + + vw := vwPool.Get().(*valueWriter) + defer putValueWriter(vw) + + vw.reset(dst) + + err := copyDocument(vw, src) + dst = vw.buf + return dst, err +} + +// appendArrayBytes copies an array from the ValueReader to dst. +func appendArrayBytes(dst []byte, src ValueReader) ([]byte, error) { + if br, ok := src.(bytesReader); ok { + _, dst, err := br.readValueBytes(dst) + return dst, err + } + + vw := vwPool.Get().(*valueWriter) + defer putValueWriter(vw) + + vw.reset(dst) + + err := copyArray(vw, src) + dst = vw.buf + return dst, err +} + +// copyValueFromBytes will write the value represtend by t and src to dst. +func copyValueFromBytes(dst ValueWriter, t Type, src []byte) error { + if wvb, ok := dst.(bytesWriter); ok { + return wvb.writeValueBytes(t, src) + } + + vr := newDocumentReader(bytes.NewReader(src)) + vr.pushElement(t) + + return copyValue(dst, vr) +} + +// copyValueToBytes copies a value from src and returns it as a Type and a +// []byte. +func copyValueToBytes(src ValueReader) (Type, []byte, error) { + if br, ok := src.(bytesReader); ok { + return br.readValueBytes(nil) + } + + vw := vwPool.Get().(*valueWriter) + defer putValueWriter(vw) + + vw.reset(nil) + vw.push(mElement) + + err := copyValue(vw, src) + if err != nil { + return 0, nil, err + } + + return Type(vw.buf[0]), vw.buf[2:], nil +} + +// copyValue will copy a single value from src to dst. +func copyValue(dst ValueWriter, src ValueReader) error { + var err error + switch src.Type() { + case TypeDouble: + var f64 float64 + f64, err = src.ReadDouble() + if err != nil { + break + } + err = dst.WriteDouble(f64) + case TypeString: + var str string + str, err = src.ReadString() + if err != nil { + return err + } + err = dst.WriteString(str) + case TypeEmbeddedDocument: + err = copyDocument(dst, src) + case TypeArray: + err = copyArray(dst, src) + case TypeBinary: + var data []byte + var subtype byte + data, subtype, err = src.ReadBinary() + if err != nil { + break + } + err = dst.WriteBinaryWithSubtype(data, subtype) + case TypeUndefined: + err = src.ReadUndefined() + if err != nil { + break + } + err = dst.WriteUndefined() + case TypeObjectID: + var oid ObjectID + oid, err = src.ReadObjectID() + if err != nil { + break + } + err = dst.WriteObjectID(oid) + case TypeBoolean: + var b bool + b, err = src.ReadBoolean() + if err != nil { + break + } + err = dst.WriteBoolean(b) + case TypeDateTime: + var dt int64 + dt, err = src.ReadDateTime() + if err != nil { + break + } + err = dst.WriteDateTime(dt) + case TypeNull: + err = src.ReadNull() + if err != nil { + break + } + err = dst.WriteNull() + case TypeRegex: + var pattern, options string + pattern, options, err = src.ReadRegex() + if err != nil { + break + } + err = dst.WriteRegex(pattern, options) + case TypeDBPointer: + var ns string + var pointer ObjectID + ns, pointer, err = src.ReadDBPointer() + if err != nil { + break + } + err = dst.WriteDBPointer(ns, pointer) + case TypeJavaScript: + var js string + js, err = src.ReadJavascript() + if err != nil { + break + } + err = dst.WriteJavascript(js) + case TypeSymbol: + var symbol string + symbol, err = src.ReadSymbol() + if err != nil { + break + } + err = dst.WriteSymbol(symbol) + case TypeCodeWithScope: + var code string + var srcScope DocumentReader + code, srcScope, err = src.ReadCodeWithScope() + if err != nil { + break + } + + var dstScope DocumentWriter + dstScope, err = dst.WriteCodeWithScope(code) + if err != nil { + break + } + err = copyDocumentCore(dstScope, srcScope) + case TypeInt32: + var i32 int32 + i32, err = src.ReadInt32() + if err != nil { + break + } + err = dst.WriteInt32(i32) + case TypeTimestamp: + var t, i uint32 + t, i, err = src.ReadTimestamp() + if err != nil { + break + } + err = dst.WriteTimestamp(t, i) + case TypeInt64: + var i64 int64 + i64, err = src.ReadInt64() + if err != nil { + break + } + err = dst.WriteInt64(i64) + case TypeDecimal128: + var d128 Decimal128 + d128, err = src.ReadDecimal128() + if err != nil { + break + } + err = dst.WriteDecimal128(d128) + case TypeMinKey: + err = src.ReadMinKey() + if err != nil { + break + } + err = dst.WriteMinKey() + case TypeMaxKey: + err = src.ReadMaxKey() + if err != nil { + break + } + err = dst.WriteMaxKey() + default: + err = fmt.Errorf("cannot copy unknown BSON type %s", src.Type()) + } + + return err +} + +func copyArray(dst ValueWriter, src ValueReader) error { + ar, err := src.ReadArray() + if err != nil { + return err + } + + aw, err := dst.WriteArray() + if err != nil { + return err + } + + for { + vr, err := ar.ReadValue() + if errors.Is(err, ErrEOA) { + break + } + if err != nil { + return err + } + + vw, err := aw.WriteArrayElement() + if err != nil { + return err + } + + err = copyValue(vw, vr) + if err != nil { + return err + } + } + + return aw.WriteArrayEnd() +} + +func copyDocumentCore(dw DocumentWriter, dr DocumentReader) error { + for { + key, vr, err := dr.ReadElement() + if errors.Is(err, ErrEOD) { + break + } + if err != nil { + return err + } + + vw, err := dw.WriteDocumentElement(key) + if err != nil { + return err + } + + err = copyValue(vw, vr) + if err != nil { + return err + } + } + + return dw.WriteDocumentEnd() +} + +// bytesReader is the interface used to read BSON bytes from a valueReader. +// +// The bytes of the value will be appended to dst. +type bytesReader interface { + readValueBytes(dst []byte) (Type, []byte, error) +} + +// bytesWriter is the interface used to write BSON bytes to a valueWriter. +type bytesWriter interface { + writeValueBytes(t Type, b []byte) error +} diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/decimal.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/decimal.go new file mode 100644 index 00000000..85c506ef --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/decimal.go @@ -0,0 +1,339 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 +// +// Based on gopkg.in/mgo.v2/bson by Gustavo Niemeyer +// See THIRD-PARTY-NOTICES for original license terms. + +package bson + +import ( + "encoding/json" + "errors" + "fmt" + "math/big" + "regexp" + "strconv" + "strings" + + "go.mongodb.org/mongo-driver/v2/internal/decimal128" +) + +// These constants are the maximum and minimum values for the exponent field in a decimal128 value. +const ( + MaxDecimal128Exp = 6111 + MinDecimal128Exp = -6176 +) + +// These errors are returned when an invalid value is parsed as a big.Int. +var ( + ErrParseNaN = errors.New("cannot parse NaN as a *big.Int") + ErrParseInf = errors.New("cannot parse Infinity as a *big.Int") + ErrParseNegInf = errors.New("cannot parse -Infinity as a *big.Int") +) + +// Decimal128 holds decimal128 BSON values. +type Decimal128 struct { + h, l uint64 +} + +// NewDecimal128 creates a Decimal128 using the provide high and low uint64s. +func NewDecimal128(h, l uint64) Decimal128 { + return Decimal128{h: h, l: l} +} + +// GetBytes returns the underlying bytes of the BSON decimal value as two uint64 values. The first +// contains the most first 8 bytes of the value and the second contains the latter. +func (d Decimal128) GetBytes() (uint64, uint64) { + return d.h, d.l +} + +// String returns a string representation of the decimal value. +func (d Decimal128) String() string { + return decimal128.String(d.h, d.l) +} + +// BigInt returns significand as big.Int and exponent, bi * 10 ^ exp. +func (d Decimal128) BigInt() (*big.Int, int, error) { + high, low := d.GetBytes() + posSign := high>>63&1 == 0 // positive sign + + switch high >> 58 & (1<<5 - 1) { + case 0x1F: + return nil, 0, ErrParseNaN + case 0x1E: + if posSign { + return nil, 0, ErrParseInf + } + return nil, 0, ErrParseNegInf + } + + var exp int + if high>>61&3 == 3 { + // Bits: 1*sign 2*ignored 14*exponent 111*significand. + // Implicit 0b100 prefix in significand. + exp = int(high >> 47 & (1<<14 - 1)) + // Spec says all of these values are out of range. + high, low = 0, 0 + } else { + // Bits: 1*sign 14*exponent 113*significand + exp = int(high >> 49 & (1<<14 - 1)) + high &= (1<<49 - 1) + } + exp += MinDecimal128Exp + + // Would be handled by the logic below, but that's trivial and common. + if high == 0 && low == 0 && exp == 0 { + return new(big.Int), 0, nil + } + + bi := big.NewInt(0) + const host32bit = ^uint(0)>>32 == 0 + if host32bit { + bi.SetBits([]big.Word{big.Word(low), big.Word(low >> 32), big.Word(high), big.Word(high >> 32)}) + } else { + bi.SetBits([]big.Word{big.Word(low), big.Word(high)}) + } + + if !posSign { + return bi.Neg(bi), exp, nil + } + return bi, exp, nil +} + +// IsNaN returns whether d is NaN. +func (d Decimal128) IsNaN() bool { + return d.h>>58&(1<<5-1) == 0x1F +} + +// IsInf returns: +// +// +1 d == Infinity +// 0 other case +// -1 d == -Infinity +func (d Decimal128) IsInf() int { + if d.h>>58&(1<<5-1) != 0x1E { + return 0 + } + + if d.h>>63&1 == 0 { + return 1 + } + return -1 +} + +// IsZero returns true if d is the empty Decimal128. +func (d Decimal128) IsZero() bool { + return d.h == 0 && d.l == 0 +} + +// MarshalJSON returns Decimal128 as a string. +func (d Decimal128) MarshalJSON() ([]byte, error) { + return json.Marshal(d.String()) +} + +// UnmarshalJSON creates a Decimal128 from a JSON string, an extended JSON $numberDecimal value, or the string +// "null". If b is a JSON string or extended JSON value, d will have the value of that string, and if b is "null", d will +// be unchanged. +func (d *Decimal128) UnmarshalJSON(b []byte) error { + // Ignore "null" to keep parity with the standard library. Decoding a JSON null into a non-pointer Decimal128 field + // will leave the field unchanged. For pointer values, encoding/json will set the pointer to nil and will not + // enter the UnmarshalJSON hook. + if string(b) == "null" { + return nil + } + + var res interface{} + err := json.Unmarshal(b, &res) + if err != nil { + return err + } + str, ok := res.(string) + + // Extended JSON + if !ok { + m, ok := res.(map[string]interface{}) + if !ok { + return errors.New("not an extended JSON Decimal128: expected document") + } + d128, ok := m["$numberDecimal"] + if !ok { + return errors.New("not an extended JSON Decimal128: expected key $numberDecimal") + } + str, ok = d128.(string) + if !ok { + return errors.New("not an extended JSON Decimal128: expected decimal to be string") + } + } + + *d, err = ParseDecimal128(str) + return err +} + +var dNaN = Decimal128{0x1F << 58, 0} +var dPosInf = Decimal128{0x1E << 58, 0} +var dNegInf = Decimal128{0x3E << 58, 0} + +func dErr(s string) (Decimal128, error) { + return dNaN, fmt.Errorf("cannot parse %q as a decimal128", s) +} + +// match scientific notation number, example -10.15e-18 +var normalNumber = regexp.MustCompile(`^(?P[-+]?\d*)?(?:\.(?P\d*))?(?:[Ee](?P[-+]?\d+))?$`) + +// ParseDecimal128 takes the given string and attempts to parse it into a valid +// Decimal128 value. +func ParseDecimal128(s string) (Decimal128, error) { + if s == "" { + return dErr(s) + } + + matches := normalNumber.FindStringSubmatch(s) + if len(matches) == 0 { + orig := s + neg := s[0] == '-' + if neg || s[0] == '+' { + s = s[1:] + } + + if s == "NaN" || s == "nan" || strings.EqualFold(s, "nan") { + return dNaN, nil + } + if s == "Inf" || s == "inf" || strings.EqualFold(s, "inf") || strings.EqualFold(s, "infinity") { + if neg { + return dNegInf, nil + } + return dPosInf, nil + } + return dErr(orig) + } + + intPart := matches[1] + decPart := matches[2] + expPart := matches[3] + + var err error + exp := 0 + if expPart != "" { + exp, err = strconv.Atoi(expPart) + if err != nil { + return dErr(s) + } + } + if decPart != "" { + exp -= len(decPart) + } + + if len(strings.Trim(intPart+decPart, "-0")) > 35 { + return dErr(s) + } + + // Parse the significand (i.e. the non-exponent part) as a big.Int. + bi, ok := new(big.Int).SetString(intPart+decPart, 10) + if !ok { + return dErr(s) + } + + d, ok := ParseDecimal128FromBigInt(bi, exp) + if !ok { + return dErr(s) + } + + if bi.Sign() == 0 && s[0] == '-' { + d.h |= 1 << 63 + } + + return d, nil +} + +var ( + ten = big.NewInt(10) + zero = new(big.Int) + + maxS, _ = new(big.Int).SetString("9999999999999999999999999999999999", 10) +) + +// ParseDecimal128FromBigInt attempts to parse the given significand and exponent into a valid Decimal128 value. +func ParseDecimal128FromBigInt(bi *big.Int, exp int) (Decimal128, bool) { + // copy + bi = new(big.Int).Set(bi) + + q := new(big.Int) + r := new(big.Int) + + // If the significand is zero, the logical value will always be zero, independent of the + // exponent. However, the loops for handling out-of-range exponent values below may be extremely + // slow for zero values because the significand never changes. Limit the exponent value to the + // supported range here to prevent entering the loops below. + if bi.Cmp(zero) == 0 { + if exp > MaxDecimal128Exp { + exp = MaxDecimal128Exp + } + if exp < MinDecimal128Exp { + exp = MinDecimal128Exp + } + } + + for bigIntCmpAbs(bi, maxS) == 1 { + bi, _ = q.QuoRem(bi, ten, r) + if r.Cmp(zero) != 0 { + return Decimal128{}, false + } + exp++ + if exp > MaxDecimal128Exp { + return Decimal128{}, false + } + } + + for exp < MinDecimal128Exp { + // Subnormal. + bi, _ = q.QuoRem(bi, ten, r) + if r.Cmp(zero) != 0 { + return Decimal128{}, false + } + exp++ + } + for exp > MaxDecimal128Exp { + // Clamped. + bi.Mul(bi, ten) + if bigIntCmpAbs(bi, maxS) == 1 { + return Decimal128{}, false + } + exp-- + } + + b := bi.Bytes() + var h, l uint64 + for i := 0; i < len(b); i++ { + if i < len(b)-8 { + h = h<<8 | uint64(b[i]) + continue + } + l = l<<8 | uint64(b[i]) + } + + h |= uint64(exp-MinDecimal128Exp) & uint64(1<<14-1) << 49 + if bi.Sign() == -1 { + h |= 1 << 63 + } + + return Decimal128{h: h, l: l}, true +} + +// bigIntCmpAbs computes big.Int.Cmp(absoluteValue(x), absoluteValue(y)). +func bigIntCmpAbs(x, y *big.Int) int { + xAbs := bigIntAbsValue(x) + yAbs := bigIntAbsValue(y) + return xAbs.Cmp(yAbs) +} + +// bigIntAbsValue returns a big.Int containing the absolute value of b. +// If b is already a non-negative number, it is returned without any changes or copies. +func bigIntAbsValue(b *big.Int) *big.Int { + if b.Sign() >= 0 { + return b // already positive + } + return new(big.Int).Abs(b) +} diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/decoder.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/decoder.go new file mode 100644 index 00000000..2fa9e6f1 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/decoder.go @@ -0,0 +1,136 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package bson + +import ( + "errors" + "fmt" + "reflect" + "sync" +) + +// ErrDecodeToNil is the error returned when trying to decode to a nil value +var ErrDecodeToNil = errors.New("cannot Decode to nil value") + +// This pool is used to keep the allocations of Decoders down. This is only used for the Marshal* +// methods and is not consumable from outside of this package. The Decoders retrieved from this pool +// must have both Reset and SetRegistry called on them. +var decPool = sync.Pool{ + New: func() interface{} { + return new(Decoder) + }, +} + +// A Decoder reads and decodes BSON documents from a stream. It reads from a ValueReader as +// the source of BSON data. +type Decoder struct { + dc DecodeContext + vr ValueReader +} + +// NewDecoder returns a new decoder that reads from vr. +func NewDecoder(vr ValueReader) *Decoder { + return &Decoder{ + dc: DecodeContext{Registry: defaultRegistry}, + vr: vr, + } +} + +// Decode reads the next BSON document from the stream and decodes it into the +// value pointed to by val. +// +// See [Unmarshal] for details about BSON unmarshaling behavior. +func (d *Decoder) Decode(val interface{}) error { + if unmarshaler, ok := val.(Unmarshaler); ok { + // TODO(skriptble): Reuse a []byte here and use the AppendDocumentBytes method. + buf, err := copyDocumentToBytes(d.vr) + if err != nil { + return err + } + return unmarshaler.UnmarshalBSON(buf) + } + + rval := reflect.ValueOf(val) + switch rval.Kind() { + case reflect.Ptr: + if rval.IsNil() { + return ErrDecodeToNil + } + rval = rval.Elem() + case reflect.Map: + if rval.IsNil() { + return ErrDecodeToNil + } + default: + return fmt.Errorf("argument to Decode must be a pointer or a map, but got %v", rval) + } + decoder, err := d.dc.LookupDecoder(rval.Type()) + if err != nil { + return err + } + + return decoder.DecodeValue(d.dc, d.vr, rval) +} + +// Reset will reset the state of the decoder, using the same *DecodeContext used in +// the original construction but using vr for reading. +func (d *Decoder) Reset(vr ValueReader) { + d.vr = vr +} + +// SetRegistry replaces the current registry of the decoder with r. +func (d *Decoder) SetRegistry(r *Registry) { + d.dc.Registry = r +} + +// DefaultDocumentM causes the Decoder to always unmarshal documents into the bson.M type. This +// behavior is restricted to data typed as "interface{}" or "map[string]interface{}". +func (d *Decoder) DefaultDocumentM() { + d.dc.defaultDocumentType = reflect.TypeOf(M{}) +} + +// AllowTruncatingDoubles causes the Decoder to truncate the fractional part of BSON "double" values +// when attempting to unmarshal them into a Go integer (int, int8, int16, int32, or int64) struct +// field. The truncation logic does not apply to BSON "decimal128" values. +func (d *Decoder) AllowTruncatingDoubles() { + d.dc.truncate = true +} + +// BinaryAsSlice causes the Decoder to unmarshal BSON binary field values that are the "Generic" or +// "Old" BSON binary subtype as a Go byte slice instead of a bson.Binary. +func (d *Decoder) BinaryAsSlice() { + d.dc.binaryAsSlice = true +} + +// ObjectIDAsHexString causes the Decoder to decode object IDs to their hex representation. +func (d *Decoder) ObjectIDAsHexString() { + d.dc.objectIDAsHexString = true +} + +// UseJSONStructTags causes the Decoder to fall back to using the "json" struct tag if a "bson" +// struct tag is not specified. +func (d *Decoder) UseJSONStructTags() { + d.dc.useJSONStructTags = true +} + +// UseLocalTimeZone causes the Decoder to unmarshal time.Time values in the local timezone instead +// of the UTC timezone. +func (d *Decoder) UseLocalTimeZone() { + d.dc.useLocalTimeZone = true +} + +// ZeroMaps causes the Decoder to delete any existing values from Go maps in the destination value +// passed to Decode before unmarshaling BSON documents into them. +func (d *Decoder) ZeroMaps() { + d.dc.zeroMaps = true +} + +// ZeroStructs causes the Decoder to delete any existing values from Go structs in the destination +// value passed to Decode before unmarshaling BSON documents into them. +func (d *Decoder) ZeroStructs() { + d.dc.zeroStructs = true +} diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/default_value_decoders.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/default_value_decoders.go new file mode 100644 index 00000000..b7d20339 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/default_value_decoders.go @@ -0,0 +1,1518 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package bson + +import ( + "encoding/json" + "errors" + "fmt" + "math" + "net/url" + "reflect" + "strconv" + + "go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore" +) + +var errCannotTruncate = errors.New("float64 can only be truncated to a lower precision type when truncation is enabled") + +type decodeBinaryError struct { + subtype byte + typeName string +} + +func (d decodeBinaryError) Error() string { + return fmt.Sprintf("only binary values with subtype 0x00 or 0x02 can be decoded into %s, but got subtype %v", d.typeName, d.subtype) +} + +// registerDefaultDecoders will register the decoder methods attached to DefaultValueDecoders with +// the provided RegistryBuilder. +// +// There is no support for decoding map[string]interface{} because there is no decoder for +// interface{}, so users must either register this decoder themselves or use the +// EmptyInterfaceDecoder available in the bson package. +func registerDefaultDecoders(reg *Registry) { + intDecoder := decodeAdapter{intDecodeValue, intDecodeType} + floatDecoder := decodeAdapter{floatDecodeValue, floatDecodeType} + uintCodec := &uintCodec{} + + reg.RegisterTypeDecoder(tD, ValueDecoderFunc(dDecodeValue)) + reg.RegisterTypeDecoder(tBinary, decodeAdapter{binaryDecodeValue, binaryDecodeType}) + reg.RegisterTypeDecoder(tVector, decodeAdapter{vectorDecodeValue, vectorDecodeType}) + reg.RegisterTypeDecoder(tUndefined, decodeAdapter{undefinedDecodeValue, undefinedDecodeType}) + reg.RegisterTypeDecoder(tDateTime, decodeAdapter{dateTimeDecodeValue, dateTimeDecodeType}) + reg.RegisterTypeDecoder(tNull, decodeAdapter{nullDecodeValue, nullDecodeType}) + reg.RegisterTypeDecoder(tRegex, decodeAdapter{regexDecodeValue, regexDecodeType}) + reg.RegisterTypeDecoder(tDBPointer, decodeAdapter{dbPointerDecodeValue, dbPointerDecodeType}) + reg.RegisterTypeDecoder(tTimestamp, decodeAdapter{timestampDecodeValue, timestampDecodeType}) + reg.RegisterTypeDecoder(tMinKey, decodeAdapter{minKeyDecodeValue, minKeyDecodeType}) + reg.RegisterTypeDecoder(tMaxKey, decodeAdapter{maxKeyDecodeValue, maxKeyDecodeType}) + reg.RegisterTypeDecoder(tJavaScript, decodeAdapter{javaScriptDecodeValue, javaScriptDecodeType}) + reg.RegisterTypeDecoder(tSymbol, decodeAdapter{symbolDecodeValue, symbolDecodeType}) + reg.RegisterTypeDecoder(tByteSlice, &byteSliceCodec{}) + reg.RegisterTypeDecoder(tTime, &timeCodec{}) + reg.RegisterTypeDecoder(tEmpty, &emptyInterfaceCodec{}) + reg.RegisterTypeDecoder(tCoreArray, &arrayCodec{}) + reg.RegisterTypeDecoder(tOID, decodeAdapter{objectIDDecodeValue, objectIDDecodeType}) + reg.RegisterTypeDecoder(tDecimal, decodeAdapter{decimal128DecodeValue, decimal128DecodeType}) + reg.RegisterTypeDecoder(tJSONNumber, decodeAdapter{jsonNumberDecodeValue, jsonNumberDecodeType}) + reg.RegisterTypeDecoder(tURL, decodeAdapter{urlDecodeValue, urlDecodeType}) + reg.RegisterTypeDecoder(tCoreDocument, ValueDecoderFunc(coreDocumentDecodeValue)) + reg.RegisterTypeDecoder(tCodeWithScope, decodeAdapter{codeWithScopeDecodeValue, codeWithScopeDecodeType}) + reg.RegisterKindDecoder(reflect.Bool, decodeAdapter{booleanDecodeValue, booleanDecodeType}) + reg.RegisterKindDecoder(reflect.Int, intDecoder) + reg.RegisterKindDecoder(reflect.Int8, intDecoder) + reg.RegisterKindDecoder(reflect.Int16, intDecoder) + reg.RegisterKindDecoder(reflect.Int32, intDecoder) + reg.RegisterKindDecoder(reflect.Int64, intDecoder) + reg.RegisterKindDecoder(reflect.Uint, uintCodec) + reg.RegisterKindDecoder(reflect.Uint8, uintCodec) + reg.RegisterKindDecoder(reflect.Uint16, uintCodec) + reg.RegisterKindDecoder(reflect.Uint32, uintCodec) + reg.RegisterKindDecoder(reflect.Uint64, uintCodec) + reg.RegisterKindDecoder(reflect.Float32, floatDecoder) + reg.RegisterKindDecoder(reflect.Float64, floatDecoder) + reg.RegisterKindDecoder(reflect.Array, ValueDecoderFunc(arrayDecodeValue)) + reg.RegisterKindDecoder(reflect.Map, &mapCodec{}) + reg.RegisterKindDecoder(reflect.Slice, &sliceCodec{}) + reg.RegisterKindDecoder(reflect.String, &stringCodec{}) + reg.RegisterKindDecoder(reflect.Struct, newStructCodec(nil)) + reg.RegisterKindDecoder(reflect.Ptr, &pointerCodec{}) + reg.RegisterTypeMapEntry(TypeDouble, tFloat64) + reg.RegisterTypeMapEntry(TypeString, tString) + reg.RegisterTypeMapEntry(TypeArray, tA) + reg.RegisterTypeMapEntry(TypeBinary, tBinary) + reg.RegisterTypeMapEntry(TypeUndefined, tUndefined) + reg.RegisterTypeMapEntry(TypeObjectID, tOID) + reg.RegisterTypeMapEntry(TypeBoolean, tBool) + reg.RegisterTypeMapEntry(TypeDateTime, tDateTime) + reg.RegisterTypeMapEntry(TypeRegex, tRegex) + reg.RegisterTypeMapEntry(TypeDBPointer, tDBPointer) + reg.RegisterTypeMapEntry(TypeJavaScript, tJavaScript) + reg.RegisterTypeMapEntry(TypeSymbol, tSymbol) + reg.RegisterTypeMapEntry(TypeCodeWithScope, tCodeWithScope) + reg.RegisterTypeMapEntry(TypeInt32, tInt32) + reg.RegisterTypeMapEntry(TypeInt64, tInt64) + reg.RegisterTypeMapEntry(TypeTimestamp, tTimestamp) + reg.RegisterTypeMapEntry(TypeDecimal128, tDecimal) + reg.RegisterTypeMapEntry(TypeMinKey, tMinKey) + reg.RegisterTypeMapEntry(TypeMaxKey, tMaxKey) + reg.RegisterTypeMapEntry(Type(0), tD) + reg.RegisterTypeMapEntry(TypeEmbeddedDocument, tD) + reg.RegisterInterfaceDecoder(tValueUnmarshaler, ValueDecoderFunc(valueUnmarshalerDecodeValue)) + reg.RegisterInterfaceDecoder(tUnmarshaler, ValueDecoderFunc(unmarshalerDecodeValue)) +} + +// dDecodeValue is the ValueDecoderFunc for D instances. +func dDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.IsValid() || !val.CanSet() || val.Type() != tD { + return ValueDecoderError{Name: "DDecodeValue", Kinds: []reflect.Kind{reflect.Slice}, Received: val} + } + + switch vrType := vr.Type(); vrType { + case Type(0), TypeEmbeddedDocument: + break + case TypeNull: + val.Set(reflect.Zero(val.Type())) + return vr.ReadNull() + default: + return fmt.Errorf("cannot decode %v into a D", vrType) + } + + dr, err := vr.ReadDocument() + if err != nil { + return err + } + + decoder, err := dc.LookupDecoder(tEmpty) + if err != nil { + return err + } + + // Use the elements in the provided value if it's non nil. Otherwise, allocate a new D instance. + var elems D + if !val.IsNil() { + val.SetLen(0) + elems = val.Interface().(D) + } else { + elems = make(D, 0) + } + + for { + key, elemVr, err := dr.ReadElement() + if errors.Is(err, ErrEOD) { + break + } else if err != nil { + return err + } + + var v interface{} + err = decoder.DecodeValue(dc, elemVr, reflect.ValueOf(&v).Elem()) + if err != nil { + return err + } + + elems = append(elems, E{Key: key, Value: v}) + } + + val.Set(reflect.ValueOf(elems)) + return nil +} + +func booleanDecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t.Kind() != reflect.Bool { + return emptyValue, ValueDecoderError{ + Name: "BooleanDecodeValue", + Kinds: []reflect.Kind{reflect.Bool}, + Received: reflect.Zero(t), + } + } + + var b bool + var err error + switch vrType := vr.Type(); vrType { + case TypeInt32: + i32, err := vr.ReadInt32() + if err != nil { + return emptyValue, err + } + b = (i32 != 0) + case TypeInt64: + i64, err := vr.ReadInt64() + if err != nil { + return emptyValue, err + } + b = (i64 != 0) + case TypeDouble: + f64, err := vr.ReadDouble() + if err != nil { + return emptyValue, err + } + b = (f64 != 0) + case TypeBoolean: + b, err = vr.ReadBoolean() + case TypeNull: + err = vr.ReadNull() + case TypeUndefined: + err = vr.ReadUndefined() + default: + return emptyValue, fmt.Errorf("cannot decode %v into a boolean", vrType) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(b), nil +} + +// booleanDecodeValue is the ValueDecoderFunc for bool types. +func booleanDecodeValue(dctx DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.IsValid() || !val.CanSet() || val.Kind() != reflect.Bool { + return ValueDecoderError{Name: "BooleanDecodeValue", Kinds: []reflect.Kind{reflect.Bool}, Received: val} + } + + elem, err := booleanDecodeType(dctx, vr, val.Type()) + if err != nil { + return err + } + + val.SetBool(elem.Bool()) + return nil +} + +func intDecodeType(dc DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + var i64 int64 + var err error + switch vrType := vr.Type(); vrType { + case TypeInt32: + i32, err := vr.ReadInt32() + if err != nil { + return emptyValue, err + } + i64 = int64(i32) + case TypeInt64: + i64, err = vr.ReadInt64() + if err != nil { + return emptyValue, err + } + case TypeDouble: + f64, err := vr.ReadDouble() + if err != nil { + return emptyValue, err + } + if !dc.truncate && math.Floor(f64) != f64 { + return emptyValue, errCannotTruncate + } + if f64 > float64(math.MaxInt64) { + return emptyValue, fmt.Errorf("%g overflows int64", f64) + } + i64 = int64(f64) + case TypeBoolean: + b, err := vr.ReadBoolean() + if err != nil { + return emptyValue, err + } + if b { + i64 = 1 + } + case TypeNull: + if err = vr.ReadNull(); err != nil { + return emptyValue, err + } + case TypeUndefined: + if err = vr.ReadUndefined(); err != nil { + return emptyValue, err + } + default: + return emptyValue, fmt.Errorf("cannot decode %v into an integer type", vrType) + } + + switch t.Kind() { + case reflect.Int8: + if i64 < math.MinInt8 || i64 > math.MaxInt8 { + return emptyValue, fmt.Errorf("%d overflows int8", i64) + } + + return reflect.ValueOf(int8(i64)), nil + case reflect.Int16: + if i64 < math.MinInt16 || i64 > math.MaxInt16 { + return emptyValue, fmt.Errorf("%d overflows int16", i64) + } + + return reflect.ValueOf(int16(i64)), nil + case reflect.Int32: + if i64 < math.MinInt32 || i64 > math.MaxInt32 { + return emptyValue, fmt.Errorf("%d overflows int32", i64) + } + + return reflect.ValueOf(int32(i64)), nil + case reflect.Int64: + return reflect.ValueOf(i64), nil + case reflect.Int: + if i64 > math.MaxInt { // Can we fit this inside of an int + return emptyValue, fmt.Errorf("%d overflows int", i64) + } + + return reflect.ValueOf(int(i64)), nil + default: + return emptyValue, ValueDecoderError{ + Name: "IntDecodeValue", + Kinds: []reflect.Kind{reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int}, + Received: reflect.Zero(t), + } + } +} + +// intDecodeValue is the ValueDecoderFunc for int types. +func intDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() { + return ValueDecoderError{ + Name: "IntDecodeValue", + Kinds: []reflect.Kind{reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int}, + Received: val, + } + } + + elem, err := intDecodeType(dc, vr, val.Type()) + if err != nil { + return err + } + + val.SetInt(elem.Int()) + return nil +} + +func floatDecodeType(dc DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + var f float64 + var err error + switch vrType := vr.Type(); vrType { + case TypeInt32: + i32, err := vr.ReadInt32() + if err != nil { + return emptyValue, err + } + f = float64(i32) + case TypeInt64: + i64, err := vr.ReadInt64() + if err != nil { + return emptyValue, err + } + f = float64(i64) + case TypeDouble: + f, err = vr.ReadDouble() + if err != nil { + return emptyValue, err + } + case TypeBoolean: + b, err := vr.ReadBoolean() + if err != nil { + return emptyValue, err + } + if b { + f = 1 + } + case TypeNull: + if err = vr.ReadNull(); err != nil { + return emptyValue, err + } + case TypeUndefined: + if err = vr.ReadUndefined(); err != nil { + return emptyValue, err + } + default: + return emptyValue, fmt.Errorf("cannot decode %v into a float32 or float64 type", vrType) + } + + switch t.Kind() { + case reflect.Float32: + if !dc.truncate && float64(float32(f)) != f { + return emptyValue, errCannotTruncate + } + + return reflect.ValueOf(float32(f)), nil + case reflect.Float64: + return reflect.ValueOf(f), nil + default: + return emptyValue, ValueDecoderError{ + Name: "FloatDecodeValue", + Kinds: []reflect.Kind{reflect.Float32, reflect.Float64}, + Received: reflect.Zero(t), + } + } +} + +// floatDecodeValue is the ValueDecoderFunc for float types. +func floatDecodeValue(ec DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() { + return ValueDecoderError{ + Name: "FloatDecodeValue", + Kinds: []reflect.Kind{reflect.Float32, reflect.Float64}, + Received: val, + } + } + + elem, err := floatDecodeType(ec, vr, val.Type()) + if err != nil { + return err + } + + val.SetFloat(elem.Float()) + return nil +} + +func javaScriptDecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tJavaScript { + return emptyValue, ValueDecoderError{ + Name: "JavaScriptDecodeValue", + Types: []reflect.Type{tJavaScript}, + Received: reflect.Zero(t), + } + } + + var js string + var err error + switch vrType := vr.Type(); vrType { + case TypeJavaScript: + js, err = vr.ReadJavascript() + case TypeNull: + err = vr.ReadNull() + case TypeUndefined: + err = vr.ReadUndefined() + default: + return emptyValue, fmt.Errorf("cannot decode %v into a JavaScript", vrType) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(JavaScript(js)), nil +} + +// javaScriptDecodeValue is the ValueDecoderFunc for the JavaScript type. +func javaScriptDecodeValue(dctx DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tJavaScript { + return ValueDecoderError{Name: "JavaScriptDecodeValue", Types: []reflect.Type{tJavaScript}, Received: val} + } + + elem, err := javaScriptDecodeType(dctx, vr, tJavaScript) + if err != nil { + return err + } + + val.SetString(elem.String()) + return nil +} + +func symbolDecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tSymbol { + return emptyValue, ValueDecoderError{ + Name: "SymbolDecodeValue", + Types: []reflect.Type{tSymbol}, + Received: reflect.Zero(t), + } + } + + var symbol string + var err error + switch vrType := vr.Type(); vrType { + case TypeString: + symbol, err = vr.ReadString() + case TypeSymbol: + symbol, err = vr.ReadSymbol() + case TypeBinary: + data, subtype, err := vr.ReadBinary() + if err != nil { + return emptyValue, err + } + + if subtype != TypeBinaryGeneric && subtype != TypeBinaryBinaryOld { + return emptyValue, decodeBinaryError{subtype: subtype, typeName: "Symbol"} + } + symbol = string(data) + case TypeNull: + err = vr.ReadNull() + case TypeUndefined: + err = vr.ReadUndefined() + default: + return emptyValue, fmt.Errorf("cannot decode %v into a Symbol", vrType) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(Symbol(symbol)), nil +} + +// symbolDecodeValue is the ValueDecoderFunc for the Symbol type. +func symbolDecodeValue(dctx DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tSymbol { + return ValueDecoderError{Name: "SymbolDecodeValue", Types: []reflect.Type{tSymbol}, Received: val} + } + + elem, err := symbolDecodeType(dctx, vr, tSymbol) + if err != nil { + return err + } + + val.SetString(elem.String()) + return nil +} + +func binaryDecode(vr ValueReader) (Binary, error) { + var b Binary + + var data []byte + var subtype byte + var err error + switch vrType := vr.Type(); vrType { + case TypeBinary: + data, subtype, err = vr.ReadBinary() + case TypeNull: + err = vr.ReadNull() + case TypeUndefined: + err = vr.ReadUndefined() + default: + return b, fmt.Errorf("cannot decode %v into a Binary", vrType) + } + if err != nil { + return b, err + } + b.Subtype = subtype + b.Data = data + + return b, nil +} + +func binaryDecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tBinary { + return emptyValue, ValueDecoderError{ + Name: "BinaryDecodeValue", + Types: []reflect.Type{tBinary}, + Received: reflect.Zero(t), + } + } + + b, err := binaryDecode(vr) + if err != nil { + return emptyValue, err + } + return reflect.ValueOf(b), nil +} + +// binaryDecodeValue is the ValueDecoderFunc for Binary. +func binaryDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tBinary { + return ValueDecoderError{Name: "BinaryDecodeValue", Types: []reflect.Type{tBinary}, Received: val} + } + + elem, err := binaryDecodeType(dc, vr, tBinary) + if err != nil { + return err + } + + val.Set(elem) + return nil +} + +func vectorDecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tVector { + return emptyValue, ValueDecoderError{ + Name: "VectorDecodeValue", + Types: []reflect.Type{tVector}, + Received: reflect.Zero(t), + } + } + + b, err := binaryDecode(vr) + if err != nil { + return emptyValue, err + } + + v, err := NewVectorFromBinary(b) + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(v), nil +} + +// vectorDecodeValue is the ValueDecoderFunc for Vector. +func vectorDecodeValue(dctx DecodeContext, vr ValueReader, val reflect.Value) error { + t := val.Type() + if !val.CanSet() || t != tVector { + return ValueDecoderError{ + Name: "VectorDecodeValue", + Types: []reflect.Type{tVector}, + Received: val, + } + } + + elem, err := vectorDecodeType(dctx, vr, t) + if err != nil { + return err + } + + val.Set(elem) + return nil +} + +func undefinedDecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tUndefined { + return emptyValue, ValueDecoderError{ + Name: "UndefinedDecodeValue", + Types: []reflect.Type{tUndefined}, + Received: reflect.Zero(t), + } + } + + var err error + switch vrType := vr.Type(); vrType { + case TypeUndefined: + err = vr.ReadUndefined() + case TypeNull: + err = vr.ReadNull() + default: + return emptyValue, fmt.Errorf("cannot decode %v into an Undefined", vr.Type()) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(Undefined{}), nil +} + +// undefinedDecodeValue is the ValueDecoderFunc for Undefined. +func undefinedDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tUndefined { + return ValueDecoderError{Name: "UndefinedDecodeValue", Types: []reflect.Type{tUndefined}, Received: val} + } + + elem, err := undefinedDecodeType(dc, vr, tUndefined) + if err != nil { + return err + } + + val.Set(elem) + return nil +} + +// Accept both 12-byte string and pretty-printed 24-byte hex string formats. +func objectIDDecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tOID { + return emptyValue, ValueDecoderError{ + Name: "ObjectIDDecodeValue", + Types: []reflect.Type{tOID}, + Received: reflect.Zero(t), + } + } + + var oid ObjectID + var err error + switch vrType := vr.Type(); vrType { + case TypeObjectID: + oid, err = vr.ReadObjectID() + if err != nil { + return emptyValue, err + } + case TypeString: + str, err := vr.ReadString() + if err != nil { + return emptyValue, err + } + if oid, err = ObjectIDFromHex(str); err == nil { + break + } + if len(str) != 12 { + return emptyValue, fmt.Errorf("an ObjectID string must be exactly 12 bytes long (got %v)", len(str)) + } + byteArr := []byte(str) + copy(oid[:], byteArr) + case TypeNull: + if err = vr.ReadNull(); err != nil { + return emptyValue, err + } + case TypeUndefined: + if err = vr.ReadUndefined(); err != nil { + return emptyValue, err + } + default: + return emptyValue, fmt.Errorf("cannot decode %v into an ObjectID", vrType) + } + + return reflect.ValueOf(oid), nil +} + +// objectIDDecodeValue is the ValueDecoderFunc for ObjectID. +func objectIDDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tOID { + return ValueDecoderError{Name: "ObjectIDDecodeValue", Types: []reflect.Type{tOID}, Received: val} + } + + elem, err := objectIDDecodeType(dc, vr, tOID) + if err != nil { + return err + } + + val.Set(elem) + return nil +} + +func dateTimeDecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tDateTime { + return emptyValue, ValueDecoderError{ + Name: "DateTimeDecodeValue", + Types: []reflect.Type{tDateTime}, + Received: reflect.Zero(t), + } + } + + var dt int64 + var err error + switch vrType := vr.Type(); vrType { + case TypeDateTime: + dt, err = vr.ReadDateTime() + case TypeNull: + err = vr.ReadNull() + case TypeUndefined: + err = vr.ReadUndefined() + default: + return emptyValue, fmt.Errorf("cannot decode %v into a DateTime", vrType) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(DateTime(dt)), nil +} + +// dateTimeDecodeValue is the ValueDecoderFunc for DateTime. +func dateTimeDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tDateTime { + return ValueDecoderError{Name: "DateTimeDecodeValue", Types: []reflect.Type{tDateTime}, Received: val} + } + + elem, err := dateTimeDecodeType(dc, vr, tDateTime) + if err != nil { + return err + } + + val.Set(elem) + return nil +} + +func nullDecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tNull { + return emptyValue, ValueDecoderError{ + Name: "NullDecodeValue", + Types: []reflect.Type{tNull}, + Received: reflect.Zero(t), + } + } + + var err error + switch vrType := vr.Type(); vrType { + case TypeUndefined: + err = vr.ReadUndefined() + case TypeNull: + err = vr.ReadNull() + default: + return emptyValue, fmt.Errorf("cannot decode %v into a Null", vr.Type()) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(Null{}), nil +} + +// nullDecodeValue is the ValueDecoderFunc for Null. +func nullDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tNull { + return ValueDecoderError{Name: "NullDecodeValue", Types: []reflect.Type{tNull}, Received: val} + } + + elem, err := nullDecodeType(dc, vr, tNull) + if err != nil { + return err + } + + val.Set(elem) + return nil +} + +func regexDecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tRegex { + return emptyValue, ValueDecoderError{ + Name: "RegexDecodeValue", + Types: []reflect.Type{tRegex}, + Received: reflect.Zero(t), + } + } + + var pattern, options string + var err error + switch vrType := vr.Type(); vrType { + case TypeRegex: + pattern, options, err = vr.ReadRegex() + case TypeNull: + err = vr.ReadNull() + case TypeUndefined: + err = vr.ReadUndefined() + default: + return emptyValue, fmt.Errorf("cannot decode %v into a Regex", vrType) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(Regex{Pattern: pattern, Options: options}), nil +} + +// regexDecodeValue is the ValueDecoderFunc for Regex. +func regexDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tRegex { + return ValueDecoderError{Name: "RegexDecodeValue", Types: []reflect.Type{tRegex}, Received: val} + } + + elem, err := regexDecodeType(dc, vr, tRegex) + if err != nil { + return err + } + + val.Set(elem) + return nil +} + +func dbPointerDecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tDBPointer { + return emptyValue, ValueDecoderError{ + Name: "DBPointerDecodeValue", + Types: []reflect.Type{tDBPointer}, + Received: reflect.Zero(t), + } + } + + var ns string + var pointer ObjectID + var err error + switch vrType := vr.Type(); vrType { + case TypeDBPointer: + ns, pointer, err = vr.ReadDBPointer() + case TypeNull: + err = vr.ReadNull() + case TypeUndefined: + err = vr.ReadUndefined() + default: + return emptyValue, fmt.Errorf("cannot decode %v into a DBPointer", vrType) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(DBPointer{DB: ns, Pointer: pointer}), nil +} + +// dbPointerDecodeValue is the ValueDecoderFunc for DBPointer. +func dbPointerDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tDBPointer { + return ValueDecoderError{Name: "DBPointerDecodeValue", Types: []reflect.Type{tDBPointer}, Received: val} + } + + elem, err := dbPointerDecodeType(dc, vr, tDBPointer) + if err != nil { + return err + } + + val.Set(elem) + return nil +} + +func timestampDecodeType(_ DecodeContext, vr ValueReader, reflectType reflect.Type) (reflect.Value, error) { + if reflectType != tTimestamp { + return emptyValue, ValueDecoderError{ + Name: "TimestampDecodeValue", + Types: []reflect.Type{tTimestamp}, + Received: reflect.Zero(reflectType), + } + } + + var t, incr uint32 + var err error + switch vrType := vr.Type(); vrType { + case TypeTimestamp: + t, incr, err = vr.ReadTimestamp() + case TypeNull: + err = vr.ReadNull() + case TypeUndefined: + err = vr.ReadUndefined() + default: + return emptyValue, fmt.Errorf("cannot decode %v into a Timestamp", vrType) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(Timestamp{T: t, I: incr}), nil +} + +// timestampDecodeValue is the ValueDecoderFunc for Timestamp. +func timestampDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tTimestamp { + return ValueDecoderError{Name: "TimestampDecodeValue", Types: []reflect.Type{tTimestamp}, Received: val} + } + + elem, err := timestampDecodeType(dc, vr, tTimestamp) + if err != nil { + return err + } + + val.Set(elem) + return nil +} + +func minKeyDecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tMinKey { + return emptyValue, ValueDecoderError{ + Name: "MinKeyDecodeValue", + Types: []reflect.Type{tMinKey}, + Received: reflect.Zero(t), + } + } + + var err error + switch vrType := vr.Type(); vrType { + case TypeMinKey: + err = vr.ReadMinKey() + case TypeNull: + err = vr.ReadNull() + case TypeUndefined: + err = vr.ReadUndefined() + default: + return emptyValue, fmt.Errorf("cannot decode %v into a MinKey", vr.Type()) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(MinKey{}), nil +} + +// minKeyDecodeValue is the ValueDecoderFunc for MinKey. +func minKeyDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tMinKey { + return ValueDecoderError{Name: "MinKeyDecodeValue", Types: []reflect.Type{tMinKey}, Received: val} + } + + elem, err := minKeyDecodeType(dc, vr, tMinKey) + if err != nil { + return err + } + + val.Set(elem) + return nil +} + +func maxKeyDecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tMaxKey { + return emptyValue, ValueDecoderError{ + Name: "MaxKeyDecodeValue", + Types: []reflect.Type{tMaxKey}, + Received: reflect.Zero(t), + } + } + + var err error + switch vrType := vr.Type(); vrType { + case TypeMaxKey: + err = vr.ReadMaxKey() + case TypeNull: + err = vr.ReadNull() + case TypeUndefined: + err = vr.ReadUndefined() + default: + return emptyValue, fmt.Errorf("cannot decode %v into a MaxKey", vr.Type()) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(MaxKey{}), nil +} + +// maxKeyDecodeValue is the ValueDecoderFunc for MaxKey. +func maxKeyDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tMaxKey { + return ValueDecoderError{Name: "MaxKeyDecodeValue", Types: []reflect.Type{tMaxKey}, Received: val} + } + + elem, err := maxKeyDecodeType(dc, vr, tMaxKey) + if err != nil { + return err + } + + val.Set(elem) + return nil +} + +func decimal128DecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tDecimal { + return emptyValue, ValueDecoderError{ + Name: "Decimal128DecodeValue", + Types: []reflect.Type{tDecimal}, + Received: reflect.Zero(t), + } + } + + var d128 Decimal128 + var err error + switch vrType := vr.Type(); vrType { + case TypeDecimal128: + d128, err = vr.ReadDecimal128() + case TypeNull: + err = vr.ReadNull() + case TypeUndefined: + err = vr.ReadUndefined() + default: + return emptyValue, fmt.Errorf("cannot decode %v into a Decimal128", vr.Type()) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(d128), nil +} + +// decimal128DecodeValue is the ValueDecoderFunc for Decimal128. +func decimal128DecodeValue(dctx DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tDecimal { + return ValueDecoderError{Name: "Decimal128DecodeValue", Types: []reflect.Type{tDecimal}, Received: val} + } + + elem, err := decimal128DecodeType(dctx, vr, tDecimal) + if err != nil { + return err + } + + val.Set(elem) + return nil +} + +func jsonNumberDecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tJSONNumber { + return emptyValue, ValueDecoderError{ + Name: "JSONNumberDecodeValue", + Types: []reflect.Type{tJSONNumber}, + Received: reflect.Zero(t), + } + } + + var jsonNum json.Number + var err error + switch vrType := vr.Type(); vrType { + case TypeDouble: + f64, err := vr.ReadDouble() + if err != nil { + return emptyValue, err + } + jsonNum = json.Number(strconv.FormatFloat(f64, 'f', -1, 64)) + case TypeInt32: + i32, err := vr.ReadInt32() + if err != nil { + return emptyValue, err + } + jsonNum = json.Number(strconv.FormatInt(int64(i32), 10)) + case TypeInt64: + i64, err := vr.ReadInt64() + if err != nil { + return emptyValue, err + } + jsonNum = json.Number(strconv.FormatInt(i64, 10)) + case TypeNull: + err = vr.ReadNull() + case TypeUndefined: + err = vr.ReadUndefined() + default: + return emptyValue, fmt.Errorf("cannot decode %v into a json.Number", vrType) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(jsonNum), nil +} + +// jsonNumberDecodeValue is the ValueDecoderFunc for json.Number. +func jsonNumberDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tJSONNumber { + return ValueDecoderError{Name: "JSONNumberDecodeValue", Types: []reflect.Type{tJSONNumber}, Received: val} + } + + elem, err := jsonNumberDecodeType(dc, vr, tJSONNumber) + if err != nil { + return err + } + + val.Set(elem) + return nil +} + +func urlDecodeType(_ DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tURL { + return emptyValue, ValueDecoderError{ + Name: "URLDecodeValue", + Types: []reflect.Type{tURL}, + Received: reflect.Zero(t), + } + } + + urlPtr := &url.URL{} + var err error + switch vrType := vr.Type(); vrType { + case TypeString: + var str string // Declare str here to avoid shadowing err during the ReadString call. + str, err = vr.ReadString() + if err != nil { + return emptyValue, err + } + + urlPtr, err = url.Parse(str) + case TypeNull: + err = vr.ReadNull() + case TypeUndefined: + err = vr.ReadUndefined() + default: + return emptyValue, fmt.Errorf("cannot decode %v into a *url.URL", vrType) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(urlPtr).Elem(), nil +} + +// urlDecodeValue is the ValueDecoderFunc for url.URL. +func urlDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tURL { + return ValueDecoderError{Name: "URLDecodeValue", Types: []reflect.Type{tURL}, Received: val} + } + + elem, err := urlDecodeType(dc, vr, tURL) + if err != nil { + return err + } + + val.Set(elem) + return nil +} + +// arrayDecodeValue is the ValueDecoderFunc for array types. +func arrayDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.IsValid() || val.Kind() != reflect.Array { + return ValueDecoderError{Name: "ArrayDecodeValue", Kinds: []reflect.Kind{reflect.Array}, Received: val} + } + + switch vrType := vr.Type(); vrType { + case TypeArray: + case Type(0), TypeEmbeddedDocument: + if val.Type().Elem() != tE { + return fmt.Errorf("cannot decode document into %s", val.Type()) + } + case TypeBinary: + if val.Type().Elem() != tByte { + return fmt.Errorf("ArrayDecodeValue can only be used to decode binary into a byte array, got %v", vrType) + } + data, subtype, err := vr.ReadBinary() + if err != nil { + return err + } + if subtype != TypeBinaryGeneric && subtype != TypeBinaryBinaryOld { + return fmt.Errorf("ArrayDecodeValue can only be used to decode subtype 0x00 or 0x02 for %s, got %v", TypeBinary, subtype) + } + + if len(data) > val.Len() { + return fmt.Errorf("more elements returned in array than can fit inside %s", val.Type()) + } + + for idx, elem := range data { + val.Index(idx).Set(reflect.ValueOf(elem)) + } + return nil + case TypeNull: + val.Set(reflect.Zero(val.Type())) + return vr.ReadNull() + case TypeUndefined: + val.Set(reflect.Zero(val.Type())) + return vr.ReadUndefined() + default: + return fmt.Errorf("cannot decode %v into an array", vrType) + } + + var elemsFunc func(DecodeContext, ValueReader, reflect.Value) ([]reflect.Value, error) + switch val.Type().Elem() { + case tE: + elemsFunc = decodeD + default: + elemsFunc = decodeDefault + } + + elems, err := elemsFunc(dc, vr, val) + if err != nil { + return err + } + + if len(elems) > val.Len() { + return fmt.Errorf("more elements returned in array than can fit inside %s, got %v elements", val.Type(), len(elems)) + } + + for idx, elem := range elems { + val.Index(idx).Set(elem) + } + + return nil +} + +// valueUnmarshalerDecodeValue is the ValueDecoderFunc for ValueUnmarshaler implementations. +func valueUnmarshalerDecodeValue(_ DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.IsValid() || (!val.Type().Implements(tValueUnmarshaler) && !reflect.PtrTo(val.Type()).Implements(tValueUnmarshaler)) { + return ValueDecoderError{Name: "ValueUnmarshalerDecodeValue", Types: []reflect.Type{tValueUnmarshaler}, Received: val} + } + + // If BSON value is null and the go value is a pointer, then don't call + // UnmarshalBSONValue. Even if the Go pointer is already initialized (i.e., + // non-nil), encountering null in BSON will result in the pointer being + // directly set to nil here. Since the pointer is being replaced with nil, + // there is no opportunity (or reason) for the custom UnmarshalBSONValue logic + // to be called. + if vr.Type() == TypeNull && val.Kind() == reflect.Ptr { + val.Set(reflect.Zero(val.Type())) + + return vr.ReadNull() + } + + if val.Kind() == reflect.Ptr && val.IsNil() { + if !val.CanSet() { + return ValueDecoderError{Name: "ValueUnmarshalerDecodeValue", Types: []reflect.Type{tValueUnmarshaler}, Received: val} + } + val.Set(reflect.New(val.Type().Elem())) + } + + if !val.Type().Implements(tValueUnmarshaler) { + if !val.CanAddr() { + return ValueDecoderError{Name: "ValueUnmarshalerDecodeValue", Types: []reflect.Type{tValueUnmarshaler}, Received: val} + } + val = val.Addr() // If the type doesn't implement the interface, a pointer to it must. + } + + t, src, err := copyValueToBytes(vr) + if err != nil { + return err + } + + m, ok := val.Interface().(ValueUnmarshaler) + if !ok { + // NB: this error should be unreachable due to the above checks + return ValueDecoderError{Name: "ValueUnmarshalerDecodeValue", Types: []reflect.Type{tValueUnmarshaler}, Received: val} + } + return m.UnmarshalBSONValue(byte(t), src) +} + +// unmarshalerDecodeValue is the ValueDecoderFunc for Unmarshaler implementations. +func unmarshalerDecodeValue(_ DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.IsValid() || (!val.Type().Implements(tUnmarshaler) && !reflect.PtrTo(val.Type()).Implements(tUnmarshaler)) { + return ValueDecoderError{Name: "UnmarshalerDecodeValue", Types: []reflect.Type{tUnmarshaler}, Received: val} + } + + if val.Kind() == reflect.Ptr && val.IsNil() { + if !val.CanSet() { + return ValueDecoderError{Name: "UnmarshalerDecodeValue", Types: []reflect.Type{tUnmarshaler}, Received: val} + } + val.Set(reflect.New(val.Type().Elem())) + } + + _, src, err := copyValueToBytes(vr) + if err != nil { + return err + } + + // If the target Go value is a pointer and the BSON field value is empty, set the value to the + // zero value of the pointer (nil) and don't call UnmarshalBSON. UnmarshalBSON has no way to + // change the pointer value from within the function (only the value at the pointer address), + // so it can't set the pointer to "nil" itself. Since the most common Go value for an empty BSON + // field value is "nil", we set "nil" here and don't call UnmarshalBSON. This behavior matches + // the behavior of the Go "encoding/json" unmarshaler when the target Go value is a pointer and + // the JSON field value is "null". + if val.Kind() == reflect.Ptr && len(src) == 0 { + val.Set(reflect.Zero(val.Type())) + return nil + } + + if !val.Type().Implements(tUnmarshaler) { + if !val.CanAddr() { + return ValueDecoderError{Name: "UnmarshalerDecodeValue", Types: []reflect.Type{tUnmarshaler}, Received: val} + } + val = val.Addr() // If the type doesn't implement the interface, a pointer to it must. + } + + m, ok := val.Interface().(Unmarshaler) + if !ok { + // NB: this error should be unreachable due to the above checks + return ValueDecoderError{Name: "UnmarshalerDecodeValue", Types: []reflect.Type{tUnmarshaler}, Received: val} + } + return m.UnmarshalBSON(src) +} + +// coreDocumentDecodeValue is the ValueDecoderFunc for bsoncore.Document. +func coreDocumentDecodeValue(_ DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tCoreDocument { + return ValueDecoderError{Name: "CoreDocumentDecodeValue", Types: []reflect.Type{tCoreDocument}, Received: val} + } + + if val.IsNil() { + val.Set(reflect.MakeSlice(val.Type(), 0, 0)) + } + + val.SetLen(0) + + cdoc, err := appendDocumentBytes(val.Interface().(bsoncore.Document), vr) + val.Set(reflect.ValueOf(cdoc)) + return err +} + +func decodeDefault(dc DecodeContext, vr ValueReader, val reflect.Value) ([]reflect.Value, error) { + elems := make([]reflect.Value, 0) + + ar, err := vr.ReadArray() + if err != nil { + return nil, err + } + + eType := val.Type().Elem() + + isInterfaceSlice := eType.Kind() == reflect.Interface && val.Len() > 0 + + // If this is not an interface slice with pre-populated elements, we can look up + // the decoder for eType once. + var vDecoder ValueDecoder + if !isInterfaceSlice { + vDecoder, err = dc.LookupDecoder(eType) + if err != nil { + return nil, err + } + } + + idx := 0 + for { + vr, err := ar.ReadValue() + if errors.Is(err, ErrEOA) { + break + } + if err != nil { + return nil, err + } + + var elem reflect.Value + if isInterfaceSlice && idx < val.Len() { + // Decode into an existing interface{} slot. + + elem = val.Index(idx).Elem() + switch { + case elem.Kind() != reflect.Ptr || elem.IsNil(): + valueDecoder, err := dc.LookupDecoder(elem.Type()) + if err != nil { + return nil, err + } + + // If an element is allocated and unsettable, it must be overwritten. + if !elem.CanSet() { + elem = reflect.New(elem.Type()).Elem() + } + + err = valueDecoder.DecodeValue(dc, vr, elem) + if err != nil { + return nil, newDecodeError(strconv.Itoa(idx), err) + } + case vr.Type() == TypeNull: + if err = vr.ReadNull(); err != nil { + return nil, err + } + elem = reflect.Zero(val.Index(idx).Type()) + default: + e := elem.Elem() + valueDecoder, err := dc.LookupDecoder(e.Type()) + if err != nil { + return nil, err + } + err = valueDecoder.DecodeValue(dc, vr, e) + if err != nil { + return nil, newDecodeError(strconv.Itoa(idx), err) + } + } + } else { + // For non-interface slices, or if we've exhausted the pre-populated + // slots, we create a fresh value. + + if vDecoder == nil { + vDecoder, err = dc.LookupDecoder(eType) + if err != nil { + return nil, err + } + } + elem, err = decodeTypeOrValueWithInfo(vDecoder, dc, vr, eType) + if err != nil { + return nil, newDecodeError(strconv.Itoa(idx), err) + } + } + + elems = append(elems, elem) + idx++ + } + + return elems, nil +} + +func codeWithScopeDecodeType(dc DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tCodeWithScope { + return emptyValue, ValueDecoderError{ + Name: "CodeWithScopeDecodeValue", + Types: []reflect.Type{tCodeWithScope}, + Received: reflect.Zero(t), + } + } + + var cws CodeWithScope + var err error + switch vrType := vr.Type(); vrType { + case TypeCodeWithScope: + code, dr, err := vr.ReadCodeWithScope() + if err != nil { + return emptyValue, err + } + + scope := reflect.New(tD).Elem() + elems, err := decodeElemsFromDocumentReader(dc, dr) + if err != nil { + return emptyValue, err + } + + scope.Set(reflect.MakeSlice(tD, 0, len(elems))) + scope.Set(reflect.Append(scope, elems...)) + + cws = CodeWithScope{ + Code: JavaScript(code), + Scope: scope.Interface().(D), + } + case TypeNull: + err = vr.ReadNull() + case TypeUndefined: + err = vr.ReadUndefined() + default: + return emptyValue, fmt.Errorf("cannot decode %v into a CodeWithScope", vrType) + } + if err != nil { + return emptyValue, err + } + + return reflect.ValueOf(cws), nil +} + +// codeWithScopeDecodeValue is the ValueDecoderFunc for CodeWithScope. +func codeWithScopeDecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tCodeWithScope { + return ValueDecoderError{Name: "CodeWithScopeDecodeValue", Types: []reflect.Type{tCodeWithScope}, Received: val} + } + + elem, err := codeWithScopeDecodeType(dc, vr, tCodeWithScope) + if err != nil { + return err + } + + val.Set(elem) + return nil +} + +func decodeD(dc DecodeContext, vr ValueReader, _ reflect.Value) ([]reflect.Value, error) { + switch vr.Type() { + case Type(0), TypeEmbeddedDocument: + default: + return nil, fmt.Errorf("cannot decode %v into a D", vr.Type()) + } + + dr, err := vr.ReadDocument() + if err != nil { + return nil, err + } + + return decodeElemsFromDocumentReader(dc, dr) +} + +func decodeElemsFromDocumentReader(dc DecodeContext, dr DocumentReader) ([]reflect.Value, error) { + decoder, err := dc.LookupDecoder(tEmpty) + if err != nil { + return nil, err + } + + elems := make([]reflect.Value, 0) + for { + key, vr, err := dr.ReadElement() + if errors.Is(err, ErrEOD) { + break + } + if err != nil { + return nil, err + } + + val := reflect.New(tEmpty).Elem() + err = decoder.DecodeValue(dc, vr, val) + if err != nil { + return nil, newDecodeError(key, err) + } + + elems = append(elems, reflect.ValueOf(E{Key: key, Value: val.Interface()})) + } + + return elems, nil +} diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/default_value_encoders.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/default_value_encoders.go new file mode 100644 index 00000000..bd5a20f2 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/default_value_encoders.go @@ -0,0 +1,517 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package bson + +import ( + "encoding/json" + "errors" + "math" + "net/url" + "reflect" + "sync" + + "go.mongodb.org/mongo-driver/v2/x/bsonx/bsoncore" +) + +var bvwPool = sync.Pool{ + New: func() interface{} { + return new(valueWriter) + }, +} + +var errInvalidValue = errors.New("cannot encode invalid element") + +var sliceWriterPool = sync.Pool{ + New: func() interface{} { + sw := make(sliceWriter, 0) + return &sw + }, +} + +func encodeElement(ec EncodeContext, dw DocumentWriter, e E) error { + vw, err := dw.WriteDocumentElement(e.Key) + if err != nil { + return err + } + + if e.Value == nil { + return vw.WriteNull() + } + encoder, err := ec.LookupEncoder(reflect.TypeOf(e.Value)) + if err != nil { + return err + } + + err = encoder.EncodeValue(ec, vw, reflect.ValueOf(e.Value)) + if err != nil { + return err + } + return nil +} + +// registerDefaultEncoders will register the encoder methods attached to DefaultValueEncoders with +// the provided RegistryBuilder. +func registerDefaultEncoders(reg *Registry) { + mapEncoder := &mapCodec{} + uintCodec := &uintCodec{} + + reg.RegisterTypeEncoder(tByteSlice, &byteSliceCodec{}) + reg.RegisterTypeEncoder(tTime, &timeCodec{}) + reg.RegisterTypeEncoder(tEmpty, &emptyInterfaceCodec{}) + reg.RegisterTypeEncoder(tCoreArray, &arrayCodec{}) + reg.RegisterTypeEncoder(tOID, ValueEncoderFunc(objectIDEncodeValue)) + reg.RegisterTypeEncoder(tDecimal, ValueEncoderFunc(decimal128EncodeValue)) + reg.RegisterTypeEncoder(tJSONNumber, ValueEncoderFunc(jsonNumberEncodeValue)) + reg.RegisterTypeEncoder(tURL, ValueEncoderFunc(urlEncodeValue)) + reg.RegisterTypeEncoder(tJavaScript, ValueEncoderFunc(javaScriptEncodeValue)) + reg.RegisterTypeEncoder(tSymbol, ValueEncoderFunc(symbolEncodeValue)) + reg.RegisterTypeEncoder(tBinary, ValueEncoderFunc(binaryEncodeValue)) + reg.RegisterTypeEncoder(tVector, ValueEncoderFunc(vectorEncodeValue)) + reg.RegisterTypeEncoder(tUndefined, ValueEncoderFunc(undefinedEncodeValue)) + reg.RegisterTypeEncoder(tDateTime, ValueEncoderFunc(dateTimeEncodeValue)) + reg.RegisterTypeEncoder(tNull, ValueEncoderFunc(nullEncodeValue)) + reg.RegisterTypeEncoder(tRegex, ValueEncoderFunc(regexEncodeValue)) + reg.RegisterTypeEncoder(tDBPointer, ValueEncoderFunc(dbPointerEncodeValue)) + reg.RegisterTypeEncoder(tTimestamp, ValueEncoderFunc(timestampEncodeValue)) + reg.RegisterTypeEncoder(tMinKey, ValueEncoderFunc(minKeyEncodeValue)) + reg.RegisterTypeEncoder(tMaxKey, ValueEncoderFunc(maxKeyEncodeValue)) + reg.RegisterTypeEncoder(tCoreDocument, ValueEncoderFunc(coreDocumentEncodeValue)) + reg.RegisterTypeEncoder(tCodeWithScope, ValueEncoderFunc(codeWithScopeEncodeValue)) + reg.RegisterKindEncoder(reflect.Bool, ValueEncoderFunc(booleanEncodeValue)) + reg.RegisterKindEncoder(reflect.Int, ValueEncoderFunc(intEncodeValue)) + reg.RegisterKindEncoder(reflect.Int8, ValueEncoderFunc(intEncodeValue)) + reg.RegisterKindEncoder(reflect.Int16, ValueEncoderFunc(intEncodeValue)) + reg.RegisterKindEncoder(reflect.Int32, ValueEncoderFunc(intEncodeValue)) + reg.RegisterKindEncoder(reflect.Int64, ValueEncoderFunc(intEncodeValue)) + reg.RegisterKindEncoder(reflect.Uint, uintCodec) + reg.RegisterKindEncoder(reflect.Uint8, uintCodec) + reg.RegisterKindEncoder(reflect.Uint16, uintCodec) + reg.RegisterKindEncoder(reflect.Uint32, uintCodec) + reg.RegisterKindEncoder(reflect.Uint64, uintCodec) + reg.RegisterKindEncoder(reflect.Float32, ValueEncoderFunc(floatEncodeValue)) + reg.RegisterKindEncoder(reflect.Float64, ValueEncoderFunc(floatEncodeValue)) + reg.RegisterKindEncoder(reflect.Array, ValueEncoderFunc(arrayEncodeValue)) + reg.RegisterKindEncoder(reflect.Map, mapEncoder) + reg.RegisterKindEncoder(reflect.Slice, &sliceCodec{}) + reg.RegisterKindEncoder(reflect.String, &stringCodec{}) + reg.RegisterKindEncoder(reflect.Struct, newStructCodec(mapEncoder)) + reg.RegisterKindEncoder(reflect.Ptr, &pointerCodec{}) + reg.RegisterInterfaceEncoder(tValueMarshaler, ValueEncoderFunc(valueMarshalerEncodeValue)) + reg.RegisterInterfaceEncoder(tMarshaler, ValueEncoderFunc(marshalerEncodeValue)) +} + +// booleanEncodeValue is the ValueEncoderFunc for bool types. +func booleanEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Kind() != reflect.Bool { + return ValueEncoderError{Name: "BooleanEncodeValue", Kinds: []reflect.Kind{reflect.Bool}, Received: val} + } + return vw.WriteBoolean(val.Bool()) +} + +func fitsIn32Bits(i int64) bool { + return math.MinInt32 <= i && i <= math.MaxInt32 +} + +// intEncodeValue is the ValueEncoderFunc for int types. +func intEncodeValue(ec EncodeContext, vw ValueWriter, val reflect.Value) error { + switch val.Kind() { + case reflect.Int8, reflect.Int16, reflect.Int32: + return vw.WriteInt32(int32(val.Int())) + case reflect.Int: + i64 := val.Int() + if fitsIn32Bits(i64) { + return vw.WriteInt32(int32(i64)) + } + return vw.WriteInt64(i64) + case reflect.Int64: + i64 := val.Int() + if ec.minSize && fitsIn32Bits(i64) { + return vw.WriteInt32(int32(i64)) + } + return vw.WriteInt64(i64) + } + + return ValueEncoderError{ + Name: "IntEncodeValue", + Kinds: []reflect.Kind{reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int}, + Received: val, + } +} + +// floatEncodeValue is the ValueEncoderFunc for float types. +func floatEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + switch val.Kind() { + case reflect.Float32, reflect.Float64: + return vw.WriteDouble(val.Float()) + } + + return ValueEncoderError{Name: "FloatEncodeValue", Kinds: []reflect.Kind{reflect.Float32, reflect.Float64}, Received: val} +} + +// objectIDEncodeValue is the ValueEncoderFunc for ObjectID. +func objectIDEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tOID { + return ValueEncoderError{Name: "ObjectIDEncodeValue", Types: []reflect.Type{tOID}, Received: val} + } + return vw.WriteObjectID(val.Interface().(ObjectID)) +} + +// decimal128EncodeValue is the ValueEncoderFunc for Decimal128. +func decimal128EncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tDecimal { + return ValueEncoderError{Name: "Decimal128EncodeValue", Types: []reflect.Type{tDecimal}, Received: val} + } + return vw.WriteDecimal128(val.Interface().(Decimal128)) +} + +// jsonNumberEncodeValue is the ValueEncoderFunc for json.Number. +func jsonNumberEncodeValue(ec EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tJSONNumber { + return ValueEncoderError{Name: "JSONNumberEncodeValue", Types: []reflect.Type{tJSONNumber}, Received: val} + } + jsnum := val.Interface().(json.Number) + + // Attempt int first, then float64 + if i64, err := jsnum.Int64(); err == nil { + return intEncodeValue(ec, vw, reflect.ValueOf(i64)) + } + + f64, err := jsnum.Float64() + if err != nil { + return err + } + + return floatEncodeValue(ec, vw, reflect.ValueOf(f64)) +} + +// urlEncodeValue is the ValueEncoderFunc for url.URL. +func urlEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tURL { + return ValueEncoderError{Name: "URLEncodeValue", Types: []reflect.Type{tURL}, Received: val} + } + u := val.Interface().(url.URL) + return vw.WriteString(u.String()) +} + +// arrayEncodeValue is the ValueEncoderFunc for array types. +func arrayEncodeValue(ec EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Kind() != reflect.Array { + return ValueEncoderError{Name: "ArrayEncodeValue", Kinds: []reflect.Kind{reflect.Array}, Received: val} + } + + // If we have a []E we want to treat it as a document instead of as an array. + if val.Type().Elem() == tE { + dw, err := vw.WriteDocument() + if err != nil { + return err + } + + for idx := 0; idx < val.Len(); idx++ { + e := val.Index(idx).Interface().(E) + err = encodeElement(ec, dw, e) + if err != nil { + return err + } + } + + return dw.WriteDocumentEnd() + } + + // If we have a []byte we want to treat it as a binary instead of as an array. + if val.Type().Elem() == tByte { + var byteSlice []byte + for idx := 0; idx < val.Len(); idx++ { + byteSlice = append(byteSlice, val.Index(idx).Interface().(byte)) + } + return vw.WriteBinary(byteSlice) + } + + aw, err := vw.WriteArray() + if err != nil { + return err + } + + elemType := val.Type().Elem() + encoder, err := ec.LookupEncoder(elemType) + if err != nil && elemType.Kind() != reflect.Interface { + return err + } + + for idx := 0; idx < val.Len(); idx++ { + currEncoder, currVal, lookupErr := lookupElementEncoder(ec, encoder, val.Index(idx)) + if lookupErr != nil && !errors.Is(lookupErr, errInvalidValue) { + return lookupErr + } + + vw, err := aw.WriteArrayElement() + if err != nil { + return err + } + + if errors.Is(lookupErr, errInvalidValue) { + err = vw.WriteNull() + if err != nil { + return err + } + continue + } + + err = currEncoder.EncodeValue(ec, vw, currVal) + if err != nil { + return err + } + } + return aw.WriteArrayEnd() +} + +func lookupElementEncoder(ec EncodeContext, origEncoder ValueEncoder, currVal reflect.Value) (ValueEncoder, reflect.Value, error) { + if origEncoder != nil || (currVal.Kind() != reflect.Interface) { + return origEncoder, currVal, nil + } + currVal = currVal.Elem() + if !currVal.IsValid() { + return nil, currVal, errInvalidValue + } + currEncoder, err := ec.LookupEncoder(currVal.Type()) + + return currEncoder, currVal, err +} + +// valueMarshalerEncodeValue is the ValueEncoderFunc for ValueMarshaler implementations. +func valueMarshalerEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + // Either val or a pointer to val must implement ValueMarshaler + switch { + case !val.IsValid(): + return ValueEncoderError{Name: "ValueMarshalerEncodeValue", Types: []reflect.Type{tValueMarshaler}, Received: val} + case val.Type().Implements(tValueMarshaler): + // If ValueMarshaler is implemented on a concrete type, make sure that val isn't a nil pointer + if isImplementationNil(val, tValueMarshaler) { + return vw.WriteNull() + } + case reflect.PtrTo(val.Type()).Implements(tValueMarshaler) && val.CanAddr(): + val = val.Addr() + default: + return ValueEncoderError{Name: "ValueMarshalerEncodeValue", Types: []reflect.Type{tValueMarshaler}, Received: val} + } + + m, ok := val.Interface().(ValueMarshaler) + if !ok { + return vw.WriteNull() + } + t, data, err := m.MarshalBSONValue() + if err != nil { + return err + } + return copyValueFromBytes(vw, Type(t), data) +} + +// marshalerEncodeValue is the ValueEncoderFunc for Marshaler implementations. +func marshalerEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + // Either val or a pointer to val must implement Marshaler + switch { + case !val.IsValid(): + return ValueEncoderError{Name: "MarshalerEncodeValue", Types: []reflect.Type{tMarshaler}, Received: val} + case val.Type().Implements(tMarshaler): + // If Marshaler is implemented on a concrete type, make sure that val isn't a nil pointer + if isImplementationNil(val, tMarshaler) { + return vw.WriteNull() + } + case reflect.PtrTo(val.Type()).Implements(tMarshaler) && val.CanAddr(): + val = val.Addr() + default: + return ValueEncoderError{Name: "MarshalerEncodeValue", Types: []reflect.Type{tMarshaler}, Received: val} + } + + m, ok := val.Interface().(Marshaler) + if !ok { + return vw.WriteNull() + } + data, err := m.MarshalBSON() + if err != nil { + return err + } + return copyValueFromBytes(vw, TypeEmbeddedDocument, data) +} + +// javaScriptEncodeValue is the ValueEncoderFunc for the JavaScript type. +func javaScriptEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tJavaScript { + return ValueEncoderError{Name: "JavaScriptEncodeValue", Types: []reflect.Type{tJavaScript}, Received: val} + } + + return vw.WriteJavascript(val.String()) +} + +// symbolEncodeValue is the ValueEncoderFunc for the Symbol type. +func symbolEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tSymbol { + return ValueEncoderError{Name: "SymbolEncodeValue", Types: []reflect.Type{tSymbol}, Received: val} + } + + return vw.WriteSymbol(val.String()) +} + +// binaryEncodeValue is the ValueEncoderFunc for Binary. +func binaryEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tBinary { + return ValueEncoderError{Name: "BinaryEncodeValue", Types: []reflect.Type{tBinary}, Received: val} + } + b := val.Interface().(Binary) + + return vw.WriteBinaryWithSubtype(b.Data, b.Subtype) +} + +// vectorEncodeValue is the ValueEncoderFunc for Vector. +func vectorEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + t := val.Type() + if !val.IsValid() || t != tVector { + return ValueEncoderError{Name: "VectorEncodeValue", + Types: []reflect.Type{tVector}, + Received: val, + } + } + v := val.Interface().(Vector) + b := v.Binary() + return vw.WriteBinaryWithSubtype(b.Data, b.Subtype) +} + +// undefinedEncodeValue is the ValueEncoderFunc for Undefined. +func undefinedEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tUndefined { + return ValueEncoderError{Name: "UndefinedEncodeValue", Types: []reflect.Type{tUndefined}, Received: val} + } + + return vw.WriteUndefined() +} + +// dateTimeEncodeValue is the ValueEncoderFunc for DateTime. +func dateTimeEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tDateTime { + return ValueEncoderError{Name: "DateTimeEncodeValue", Types: []reflect.Type{tDateTime}, Received: val} + } + + return vw.WriteDateTime(val.Int()) +} + +// nullEncodeValue is the ValueEncoderFunc for Null. +func nullEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tNull { + return ValueEncoderError{Name: "NullEncodeValue", Types: []reflect.Type{tNull}, Received: val} + } + + return vw.WriteNull() +} + +// regexEncodeValue is the ValueEncoderFunc for Regex. +func regexEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tRegex { + return ValueEncoderError{Name: "RegexEncodeValue", Types: []reflect.Type{tRegex}, Received: val} + } + + regex := val.Interface().(Regex) + + return vw.WriteRegex(regex.Pattern, regex.Options) +} + +// dbPointerEncodeValue is the ValueEncoderFunc for DBPointer. +func dbPointerEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tDBPointer { + return ValueEncoderError{Name: "DBPointerEncodeValue", Types: []reflect.Type{tDBPointer}, Received: val} + } + + dbp := val.Interface().(DBPointer) + + return vw.WriteDBPointer(dbp.DB, dbp.Pointer) +} + +// timestampEncodeValue is the ValueEncoderFunc for Timestamp. +func timestampEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tTimestamp { + return ValueEncoderError{Name: "TimestampEncodeValue", Types: []reflect.Type{tTimestamp}, Received: val} + } + + ts := val.Interface().(Timestamp) + + return vw.WriteTimestamp(ts.T, ts.I) +} + +// minKeyEncodeValue is the ValueEncoderFunc for MinKey. +func minKeyEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tMinKey { + return ValueEncoderError{Name: "MinKeyEncodeValue", Types: []reflect.Type{tMinKey}, Received: val} + } + + return vw.WriteMinKey() +} + +// maxKeyEncodeValue is the ValueEncoderFunc for MaxKey. +func maxKeyEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tMaxKey { + return ValueEncoderError{Name: "MaxKeyEncodeValue", Types: []reflect.Type{tMaxKey}, Received: val} + } + + return vw.WriteMaxKey() +} + +// coreDocumentEncodeValue is the ValueEncoderFunc for bsoncore.Document. +func coreDocumentEncodeValue(_ EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tCoreDocument { + return ValueEncoderError{Name: "CoreDocumentEncodeValue", Types: []reflect.Type{tCoreDocument}, Received: val} + } + + cdoc := val.Interface().(bsoncore.Document) + + return copyDocumentFromBytes(vw, cdoc) +} + +// codeWithScopeEncodeValue is the ValueEncoderFunc for CodeWithScope. +func codeWithScopeEncodeValue(ec EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tCodeWithScope { + return ValueEncoderError{Name: "CodeWithScopeEncodeValue", Types: []reflect.Type{tCodeWithScope}, Received: val} + } + + cws := val.Interface().(CodeWithScope) + + dw, err := vw.WriteCodeWithScope(string(cws.Code)) + if err != nil { + return err + } + + sw := sliceWriterPool.Get().(*sliceWriter) + defer sliceWriterPool.Put(sw) + *sw = (*sw)[:0] + + scopeVW := bvwPool.Get().(*valueWriter) + scopeVW.reset(scopeVW.buf[:0]) + scopeVW.w = sw + defer bvwPool.Put(scopeVW) + + encoder, err := ec.LookupEncoder(reflect.TypeOf(cws.Scope)) + if err != nil { + return err + } + + err = encoder.EncodeValue(ec, scopeVW, reflect.ValueOf(cws.Scope)) + if err != nil { + return err + } + + err = copyBytesToDocumentWriter(dw, *sw) + if err != nil { + return err + } + return dw.WriteDocumentEnd() +} + +// isImplementationNil returns if val is a nil pointer and inter is implemented on a concrete type +func isImplementationNil(val reflect.Value, inter reflect.Type) bool { + vt := val.Type() + for vt.Kind() == reflect.Ptr { + vt = vt.Elem() + } + return vt.Implements(inter) && val.Kind() == reflect.Ptr && val.IsNil() +} diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/doc.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/doc.go new file mode 100644 index 00000000..81aceef2 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/doc.go @@ -0,0 +1,155 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +// Package bson is a library for reading, writing, and manipulating BSON. BSON is a binary serialization format used to +// store documents and make remote procedure calls in MongoDB. The BSON specification is located at https://bsonspec.org. +// The BSON library handles marshaling and unmarshaling of values through a configurable codec system. For a description +// of the codec system and examples of registering custom codecs, see the bsoncodec package. For additional information +// and usage examples, check out the [Work with BSON] page in the Go Driver docs site. +// +// # Raw BSON +// +// The Raw family of types is used to validate and retrieve elements from a slice of bytes. This +// type is most useful when you want do lookups on BSON bytes without unmarshaling it into another +// type. +// +// Example: +// +// var raw bson.Raw = ... // bytes from somewhere +// err := raw.Validate() +// if err != nil { return err } +// val := raw.Lookup("foo") +// i32, ok := val.Int32OK() +// // do something with i32... +// +// # Native Go Types +// +// The D and M types defined in this package can be used to build representations of BSON using native Go types. D is a +// slice and M is a map. For more information about the use cases for these types, see the documentation on the type +// definitions. +// +// Note that a D should not be constructed with duplicate key names, as that can cause undefined server behavior. +// +// Example: +// +// bson.D{{"foo", "bar"}, {"hello", "world"}, {"pi", 3.14159}} +// bson.M{"foo": "bar", "hello": "world", "pi": 3.14159} +// +// When decoding BSON to a D or M, the following type mappings apply when unmarshaling: +// +// 1. BSON int32 unmarshals to an int32. +// 2. BSON int64 unmarshals to an int64. +// 3. BSON double unmarshals to a float64. +// 4. BSON string unmarshals to a string. +// 5. BSON boolean unmarshals to a bool. +// 6. BSON embedded document unmarshals to the parent type (i.e. D for a D, M for an M). +// 7. BSON array unmarshals to a bson.A. +// 8. BSON ObjectId unmarshals to a bson.ObjectID. +// 9. BSON datetime unmarshals to a bson.DateTime. +// 10. BSON binary unmarshals to a bson.Binary. +// 11. BSON regular expression unmarshals to a bson.Regex. +// 12. BSON JavaScript unmarshals to a bson.JavaScript. +// 13. BSON code with scope unmarshals to a bson.CodeWithScope. +// 14. BSON timestamp unmarshals to an bson.Timestamp. +// 15. BSON 128-bit decimal unmarshals to an bson.Decimal128. +// 16. BSON min key unmarshals to an bson.MinKey. +// 17. BSON max key unmarshals to an bson.MaxKey. +// 18. BSON undefined unmarshals to a bson.Undefined. +// 19. BSON null unmarshals to nil. +// 20. BSON DBPointer unmarshals to a bson.DBPointer. +// 21. BSON symbol unmarshals to a bson.Symbol. +// +// The above mappings also apply when marshaling a D or M to BSON. Some other useful marshaling mappings are: +// +// 1. time.Time marshals to a BSON datetime. +// 2. int8, int16, and int32 marshal to a BSON int32. +// 3. int marshals to a BSON int32 if the value is between math.MinInt32 and math.MaxInt32, inclusive, and a BSON int64 +// otherwise. +// 4. int64 marshals to BSON int64 (unless [Encoder.IntMinSize] is set). +// 5. uint8 and uint16 marshal to a BSON int32. +// 6. uint, uint32, and uint64 marshal to a BSON int64 (unless [Encoder.IntMinSize] is set). +// 7. BSON null and undefined values will unmarshal into the zero value of a field (e.g. unmarshaling a BSON null or +// undefined value into a string will yield the empty string.). +// +// # Structs +// +// Structs can be marshaled/unmarshaled to/from BSON or Extended JSON. When transforming structs to/from BSON or Extended +// JSON, the following rules apply: +// +// 1. Only exported fields in structs will be marshaled or unmarshaled. +// +// 2. When marshaling a struct, each field will be lowercased to generate the key for the corresponding BSON element. +// For example, a struct field named "Foo" will generate key "foo". This can be overridden via a struct tag (e.g. +// `bson:"fooField"` to generate key "fooField" instead). +// +// 3. An embedded struct field is marshaled as a subdocument. The key will be the lowercased name of the field's type. +// +// 4. A pointer field is marshaled as the underlying type if the pointer is non-nil. If the pointer is nil, it is +// marshaled as a BSON null value. +// +// 5. When unmarshaling, a field of type interface{} will follow the D/M type mappings listed above. BSON documents +// unmarshaled into an interface{} field will be unmarshaled as a D. +// +// The encoding of each struct field can be customized by the "bson" struct tag. +// +// This tag behavior is configurable, and different struct tag behavior can be configured by initializing a new +// bsoncodec.StructCodec with the desired tag parser and registering that StructCodec onto the Registry. By default, JSON +// tags are not honored, but that can be enabled by creating a StructCodec with JSONFallbackStructTagParser, like below: +// +// Example: +// +// structcodec, _ := bsoncodec.NewStructCodec(bsoncodec.JSONFallbackStructTagParser) +// +// The bson tag gives the name of the field, possibly followed by a comma-separated list of options. +// The name may be empty in order to specify options without overriding the default field name. The following options can +// be used to configure behavior: +// +// 1. omitempty: If the "omitempty" struct tag is specified on a field, the field will not be marshaled if it is set to +// an "empty" value. Numbers, booleans, and strings are considered empty if their value is equal to the zero value for +// the type (i.e. 0 for numbers, false for booleans, and "" for strings). Slices, maps, and arrays are considered +// empty if they are of length zero. Interfaces and pointers are considered empty if their value is nil. By default, +// structs are only considered empty if the struct type implements [bsoncodec.Zeroer] and the IsZero +// method returns true. Struct types that do not implement [bsoncodec.Zeroer] are never considered empty and will be +// marshaled as embedded documents. NOTE: It is recommended that this tag be used for all slice and map fields. +// +// 2. minsize: If the minsize struct tag is specified on a field of type int64, uint, uint32, or uint64 and the value of +// the field can fit in a signed int32, the field will be serialized as a BSON int32 rather than a BSON int64. For +// other types, this tag is ignored. +// +// 3. truncate: If the truncate struct tag is specified on a field with a non-float numeric type, BSON doubles +// unmarshaled into that field will be truncated at the decimal point. For example, if 3.14 is unmarshaled into a +// field of type int, it will be unmarshaled as 3. If this tag is not specified, the decoder will throw an error if +// the value cannot be decoded without losing precision. For float64 or non-numeric types, this tag is ignored. +// +// 4. inline: If the inline struct tag is specified for a struct or map field, the field will be "flattened" when +// marshaling and "un-flattened" when unmarshaling. This means that all of the fields in that struct/map will be +// pulled up one level and will become top-level fields rather than being fields in a nested document. For example, +// if a map field named "Map" with value map[string]interface{}{"foo": "bar"} is inlined, the resulting document will +// be {"foo": "bar"} instead of {"map": {"foo": "bar"}}. There can only be one inlined map field in a struct. If +// there are duplicated fields in the resulting document when an inlined struct is marshaled, the inlined field will +// be overwritten. If there are duplicated fields in the resulting document when an inlined map is marshaled, an +// error will be returned. This tag can be used with fields that are pointers to structs. If an inlined pointer field +// is nil, it will not be marshaled. For fields that are not maps or structs, this tag is ignored. +// +// # Marshaling and Unmarshaling +// +// Manually marshaling and unmarshaling can be done with the Marshal and Unmarshal family of functions. +// +// bsoncodec code provides a system for encoding values to BSON representations and decoding +// values from BSON representations. This package considers both binary BSON and ExtendedJSON as +// BSON representations. The types in this package enable a flexible system for handling this +// encoding and decoding. +// +// The codec system is composed of two parts: +// +// 1) [ValueEncoder] and [ValueDecoder] that handle encoding and decoding Go values to and from BSON +// representations. +// +// 2) A [Registry] that holds these ValueEncoders and ValueDecoders and provides methods for +// retrieving them. +// +// [Work with BSON]: https://www.mongodb.com/docs/drivers/go/current/fundamentals/bson/ +package bson diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/empty_interface_codec.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/empty_interface_codec.go new file mode 100644 index 00000000..80d44d8c --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/empty_interface_codec.go @@ -0,0 +1,127 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package bson + +import ( + "reflect" +) + +// emptyInterfaceCodec is the Codec used for interface{} values. +type emptyInterfaceCodec struct { + // decodeBinaryAsSlice causes DecodeValue to unmarshal BSON binary field values that are the + // "Generic" or "Old" BSON binary subtype as a Go byte slice instead of a Binary. + decodeBinaryAsSlice bool +} + +// Assert that emptyInterfaceCodec satisfies the typeDecoder interface, which allows it +// to be used by collection type decoders (e.g. map, slice, etc) to set individual values in a +// collection. +var _ typeDecoder = &emptyInterfaceCodec{} + +// EncodeValue is the ValueEncoderFunc for interface{}. +func (eic *emptyInterfaceCodec) EncodeValue(ec EncodeContext, vw ValueWriter, val reflect.Value) error { + if !val.IsValid() || val.Type() != tEmpty { + return ValueEncoderError{Name: "EmptyInterfaceEncodeValue", Types: []reflect.Type{tEmpty}, Received: val} + } + + if val.IsNil() { + return vw.WriteNull() + } + encoder, err := ec.LookupEncoder(val.Elem().Type()) + if err != nil { + return err + } + + return encoder.EncodeValue(ec, vw, val.Elem()) +} + +func (eic *emptyInterfaceCodec) getEmptyInterfaceDecodeType(dc DecodeContext, valueType Type) (reflect.Type, error) { + isDocument := valueType == Type(0) || valueType == TypeEmbeddedDocument + if isDocument { + if dc.defaultDocumentType != nil { + // If the bsontype is an embedded document and the DocumentType is set on the DecodeContext, then return + // that type. + return dc.defaultDocumentType, nil + } + } + + rtype, err := dc.LookupTypeMapEntry(valueType) + if err == nil { + return rtype, nil + } + + if isDocument { + // For documents, fallback to looking up a type map entry for Type(0) or TypeEmbeddedDocument, + // depending on the original valueType. + var lookupType Type + switch valueType { + case Type(0): + lookupType = TypeEmbeddedDocument + case TypeEmbeddedDocument: + lookupType = Type(0) + } + + rtype, err = dc.LookupTypeMapEntry(lookupType) + if err == nil { + return rtype, nil + } + // fallback to bson.D + return tD, nil + } + + return nil, err +} + +func (eic *emptyInterfaceCodec) decodeType(dc DecodeContext, vr ValueReader, t reflect.Type) (reflect.Value, error) { + if t != tEmpty { + return emptyValue, ValueDecoderError{Name: "EmptyInterfaceDecodeValue", Types: []reflect.Type{tEmpty}, Received: reflect.Zero(t)} + } + + rtype, err := eic.getEmptyInterfaceDecodeType(dc, vr.Type()) + if err != nil { + switch vr.Type() { + case TypeNull: + return reflect.Zero(t), vr.ReadNull() + default: + return emptyValue, err + } + } + + decoder, err := dc.LookupDecoder(rtype) + if err != nil { + return emptyValue, err + } + + elem, err := decodeTypeOrValueWithInfo(decoder, dc, vr, rtype) + if err != nil { + return emptyValue, err + } + + if (eic.decodeBinaryAsSlice || dc.binaryAsSlice) && rtype == tBinary { + binElem := elem.Interface().(Binary) + if binElem.Subtype == TypeBinaryGeneric || binElem.Subtype == TypeBinaryBinaryOld { + elem = reflect.ValueOf(binElem.Data) + } + } + + return elem, nil +} + +// DecodeValue is the ValueDecoderFunc for interface{}. +func (eic *emptyInterfaceCodec) DecodeValue(dc DecodeContext, vr ValueReader, val reflect.Value) error { + if !val.CanSet() || val.Type() != tEmpty { + return ValueDecoderError{Name: "EmptyInterfaceDecodeValue", Types: []reflect.Type{tEmpty}, Received: val} + } + + elem, err := eic.decodeType(dc, vr, val.Type()) + if err != nil { + return err + } + + val.Set(elem) + return nil +} diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/encoder.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/encoder.go new file mode 100644 index 00000000..8299e94f --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/encoder.go @@ -0,0 +1,130 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package bson + +import ( + "reflect" + "sync" +) + +// This pool is used to keep the allocations of Encoders down. This is only used for the Marshal* +// methods and is not consumable from outside of this package. The Encoders retrieved from this pool +// must have both Reset and SetRegistry called on them. +var encPool = sync.Pool{ + New: func() interface{} { + return new(Encoder) + }, +} + +// An Encoder writes a serialization format to an output stream. It writes to a ValueWriter +// as the destination of BSON data. +type Encoder struct { + ec EncodeContext + vw ValueWriter +} + +// NewEncoder returns a new encoder that writes to vw. +func NewEncoder(vw ValueWriter) *Encoder { + return &Encoder{ + ec: EncodeContext{Registry: defaultRegistry}, + vw: vw, + } +} + +// Encode writes the BSON encoding of val to the stream. +// +// See [Marshal] for details about BSON marshaling behavior. +func (e *Encoder) Encode(val interface{}) error { + if marshaler, ok := val.(Marshaler); ok { + // TODO(skriptble): Should we have a MarshalAppender interface so that we can have []byte reuse? + buf, err := marshaler.MarshalBSON() + if err != nil { + return err + } + return copyDocumentFromBytes(e.vw, buf) + } + + encoder, err := e.ec.LookupEncoder(reflect.TypeOf(val)) + if err != nil { + return err + } + + return encoder.EncodeValue(e.ec, e.vw, reflect.ValueOf(val)) +} + +// Reset will reset the state of the Encoder, using the same *EncodeContext used in +// the original construction but using vw. +func (e *Encoder) Reset(vw ValueWriter) { + e.vw = vw +} + +// SetRegistry replaces the current registry of the Encoder with r. +func (e *Encoder) SetRegistry(r *Registry) { + e.ec.Registry = r +} + +// ErrorOnInlineDuplicates causes the Encoder to return an error if there is a duplicate field in +// the marshaled BSON when the "inline" struct tag option is set. +func (e *Encoder) ErrorOnInlineDuplicates() { + e.ec.errorOnInlineDuplicates = true +} + +// IntMinSize causes the Encoder to marshal Go integer values (int, int8, int16, int32, int64, uint, +// uint8, uint16, uint32, or uint64) as the minimum BSON int size (either 32 or 64 bits) that can +// represent the integer value. +func (e *Encoder) IntMinSize() { + e.ec.minSize = true +} + +// StringifyMapKeysWithFmt causes the Encoder to convert Go map keys to BSON document field name +// strings using fmt.Sprint instead of the default string conversion logic. +func (e *Encoder) StringifyMapKeysWithFmt() { + e.ec.stringifyMapKeysWithFmt = true +} + +// NilMapAsEmpty causes the Encoder to marshal nil Go maps as empty BSON documents instead of BSON +// null. +func (e *Encoder) NilMapAsEmpty() { + e.ec.nilMapAsEmpty = true +} + +// NilSliceAsEmpty causes the Encoder to marshal nil Go slices as empty BSON arrays instead of BSON +// null. +func (e *Encoder) NilSliceAsEmpty() { + e.ec.nilSliceAsEmpty = true +} + +// NilByteSliceAsEmpty causes the Encoder to marshal nil Go byte slices as empty BSON binary values +// instead of BSON null. +func (e *Encoder) NilByteSliceAsEmpty() { + e.ec.nilByteSliceAsEmpty = true +} + +// TODO(GODRIVER-2820): Update the description to remove the note about only examining exported +// TODO struct fields once the logic is updated to also inspect private struct fields. + +// OmitZeroStruct causes the Encoder to consider the zero value for a struct (e.g. MyStruct{}) +// as empty and omit it from the marshaled BSON when the "omitempty" struct tag option is set +// or the OmitEmpty() method is called. +// +// Note that the Encoder only examines exported struct fields when determining if a struct is the +// zero value. It considers pointers to a zero struct value (e.g. &MyStruct{}) not empty. +func (e *Encoder) OmitZeroStruct() { + e.ec.omitZeroStruct = true +} + +// OmitEmpty causes the Encoder to omit empty values from the marshaled BSON as the "omitempty" +// struct tag option is set. +func (e *Encoder) OmitEmpty() { + e.ec.omitEmpty = true +} + +// UseJSONStructTags causes the Encoder to fall back to using the "json" struct tag if a "bson" +// struct tag is not specified. +func (e *Encoder) UseJSONStructTags() { + e.ec.useJSONStructTags = true +} diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/extjson_parser.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/extjson_parser.go new file mode 100644 index 00000000..a63f23a2 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/extjson_parser.go @@ -0,0 +1,804 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package bson + +import ( + "encoding/base64" + "encoding/hex" + "errors" + "fmt" + "io" + "strings" +) + +const maxNestingDepth = 200 + +// ErrInvalidJSON indicates the JSON input is invalid +var ErrInvalidJSON = errors.New("invalid JSON input") + +type jsonParseState byte + +const ( + jpsStartState jsonParseState = iota + jpsSawBeginObject + jpsSawEndObject + jpsSawBeginArray + jpsSawEndArray + jpsSawColon + jpsSawComma + jpsSawKey + jpsSawValue + jpsDoneState + jpsInvalidState +) + +type jsonParseMode byte + +const ( + jpmInvalidMode jsonParseMode = iota + jpmObjectMode + jpmArrayMode +) + +type extJSONValue struct { + t Type + v interface{} +} + +type extJSONObject struct { + keys []string + values []*extJSONValue +} + +type extJSONParser struct { + js *jsonScanner + s jsonParseState + m []jsonParseMode + k string + v *extJSONValue + + err error + canonicalOnly bool + depth int + maxDepth int + + emptyObject bool + relaxedUUID bool +} + +// newExtJSONParser returns a new extended JSON parser, ready to to begin +// parsing from the first character of the argued json input. It will not +// perform any read-ahead and will therefore not report any errors about +// malformed JSON at this point. +func newExtJSONParser(r io.Reader, canonicalOnly bool) *extJSONParser { + return &extJSONParser{ + js: &jsonScanner{r: r}, + s: jpsStartState, + m: []jsonParseMode{}, + canonicalOnly: canonicalOnly, + maxDepth: maxNestingDepth, + } +} + +// peekType examines the next value and returns its BSON Type +func (ejp *extJSONParser) peekType() (Type, error) { + var t Type + var err error + initialState := ejp.s + + ejp.advanceState() + switch ejp.s { + case jpsSawValue: + t = ejp.v.t + case jpsSawBeginArray: + t = TypeArray + case jpsInvalidState: + err = ejp.err + case jpsSawComma: + // in array mode, seeing a comma means we need to progress again to actually observe a type + if ejp.peekMode() == jpmArrayMode { + return ejp.peekType() + } + case jpsSawEndArray: + // this would only be a valid state if we were in array mode, so return end-of-array error + err = ErrEOA + case jpsSawBeginObject: + // peek key to determine type + ejp.advanceState() + switch ejp.s { + case jpsSawEndObject: // empty embedded document + t = TypeEmbeddedDocument + ejp.emptyObject = true + case jpsInvalidState: + err = ejp.err + case jpsSawKey: + if initialState == jpsStartState { + return TypeEmbeddedDocument, nil + } + t = wrapperKeyBSONType(ejp.k) + + // if $uuid is encountered, parse as binary subtype 4 + if ejp.k == "$uuid" { + ejp.relaxedUUID = true + t = TypeBinary + } + + switch t { + case TypeJavaScript: + // just saw $code, need to check for $scope at same level + _, err = ejp.readValue(TypeJavaScript) + if err != nil { + break + } + + switch ejp.s { + case jpsSawEndObject: // type is TypeJavaScript + case jpsSawComma: + ejp.advanceState() + + if ejp.s == jpsSawKey && ejp.k == "$scope" { + t = TypeCodeWithScope + } else { + err = fmt.Errorf("invalid extended JSON: unexpected key %s in CodeWithScope object", ejp.k) + } + case jpsInvalidState: + err = ejp.err + default: + err = ErrInvalidJSON + } + case TypeCodeWithScope: + err = errors.New("invalid extended JSON: code with $scope must contain $code before $scope") + } + } + } + + return t, err +} + +// readKey parses the next key and its type and returns them +func (ejp *extJSONParser) readKey() (string, Type, error) { + if ejp.emptyObject { + ejp.emptyObject = false + return "", 0, ErrEOD + } + + // advance to key (or return with error) + switch ejp.s { + case jpsStartState: + ejp.advanceState() + if ejp.s == jpsSawBeginObject { + ejp.advanceState() + } + case jpsSawBeginObject: + ejp.advanceState() + case jpsSawValue, jpsSawEndObject, jpsSawEndArray: + ejp.advanceState() + switch ejp.s { + case jpsSawBeginObject, jpsSawComma: + ejp.advanceState() + case jpsSawEndObject: + return "", 0, ErrEOD + case jpsDoneState: + return "", 0, io.EOF + case jpsInvalidState: + return "", 0, ejp.err + default: + return "", 0, ErrInvalidJSON + } + case jpsSawKey: // do nothing (key was peeked before) + default: + return "", 0, invalidRequestError("key") + } + + // read key + var key string + + switch ejp.s { + case jpsSawKey: + key = ejp.k + case jpsSawEndObject: + return "", 0, ErrEOD + case jpsInvalidState: + return "", 0, ejp.err + default: + return "", 0, invalidRequestError("key") + } + + // check for colon + ejp.advanceState() + if err := ensureColon(ejp.s, key); err != nil { + return "", 0, err + } + + // peek at the value to determine type + t, err := ejp.peekType() + if err != nil { + return "", 0, err + } + + return key, t, nil +} + +// readValue returns the value corresponding to the Type returned by peekType +func (ejp *extJSONParser) readValue(t Type) (*extJSONValue, error) { + if ejp.s == jpsInvalidState { + return nil, ejp.err + } + + var v *extJSONValue + + switch t { + case TypeNull, TypeBoolean, TypeString: + if ejp.s != jpsSawValue { + return nil, invalidRequestError(t.String()) + } + v = ejp.v + case TypeInt32, TypeInt64, TypeDouble: + // relaxed version allows these to be literal number values + if ejp.s == jpsSawValue { + v = ejp.v + break + } + fallthrough + case TypeDecimal128, TypeSymbol, TypeObjectID, TypeMinKey, TypeMaxKey, TypeUndefined: + switch ejp.s { + case jpsSawKey: + // read colon + ejp.advanceState() + if err := ensureColon(ejp.s, ejp.k); err != nil { + return nil, err + } + + // read value + ejp.advanceState() + if ejp.s != jpsSawValue || !ejp.ensureExtValueType(t) { + return nil, invalidJSONErrorForType("value", t) + } + + v = ejp.v + + // read end object + ejp.advanceState() + if ejp.s != jpsSawEndObject { + return nil, invalidJSONErrorForType("} after value", t) + } + default: + return nil, invalidRequestError(t.String()) + } + case TypeBinary, TypeRegex, TypeTimestamp, TypeDBPointer: + if ejp.s != jpsSawKey { + return nil, invalidRequestError(t.String()) + } + // read colon + ejp.advanceState() + if err := ensureColon(ejp.s, ejp.k); err != nil { + return nil, err + } + + ejp.advanceState() + if t == TypeBinary && ejp.s == jpsSawValue { + // convert relaxed $uuid format + if ejp.relaxedUUID { + defer func() { ejp.relaxedUUID = false }() + uuid, err := ejp.v.parseSymbol() + if err != nil { + return nil, err + } + + // RFC 4122 defines the length of a UUID as 36 and the hyphens in a UUID as appearing + // in the 8th, 13th, 18th, and 23rd characters. + // + // See https://tools.ietf.org/html/rfc4122#section-3 + valid := len(uuid) == 36 && + string(uuid[8]) == "-" && + string(uuid[13]) == "-" && + string(uuid[18]) == "-" && + string(uuid[23]) == "-" + if !valid { + return nil, fmt.Errorf("$uuid value does not follow RFC 4122 format regarding length and hyphens") + } + + // remove hyphens + uuidNoHyphens := strings.ReplaceAll(uuid, "-", "") + if len(uuidNoHyphens) != 32 { + return nil, fmt.Errorf("$uuid value does not follow RFC 4122 format regarding length and hyphens") + } + + // convert hex to bytes + bytes, err := hex.DecodeString(uuidNoHyphens) + if err != nil { + return nil, fmt.Errorf("$uuid value does not follow RFC 4122 format regarding hex bytes: %w", err) + } + + ejp.advanceState() + if ejp.s != jpsSawEndObject { + return nil, invalidJSONErrorForType("$uuid and value and then }", TypeBinary) + } + + base64 := &extJSONValue{ + t: TypeString, + v: base64.StdEncoding.EncodeToString(bytes), + } + subType := &extJSONValue{ + t: TypeString, + v: "04", + } + + v = &extJSONValue{ + t: TypeEmbeddedDocument, + v: &extJSONObject{ + keys: []string{"base64", "subType"}, + values: []*extJSONValue{base64, subType}, + }, + } + + break + } + + // convert legacy $binary format + base64 := ejp.v + + ejp.advanceState() + if ejp.s != jpsSawComma { + return nil, invalidJSONErrorForType(",", TypeBinary) + } + + ejp.advanceState() + key, t, err := ejp.readKey() + if err != nil { + return nil, err + } + if key != "$type" { + return nil, invalidJSONErrorForType("$type", TypeBinary) + } + + subType, err := ejp.readValue(t) + if err != nil { + return nil, err + } + + ejp.advanceState() + if ejp.s != jpsSawEndObject { + return nil, invalidJSONErrorForType("2 key-value pairs and then }", TypeBinary) + } + + v = &extJSONValue{ + t: TypeEmbeddedDocument, + v: &extJSONObject{ + keys: []string{"base64", "subType"}, + values: []*extJSONValue{base64, subType}, + }, + } + break + } + + // read KV pairs + if ejp.s != jpsSawBeginObject { + return nil, invalidJSONErrorForType("{", t) + } + + keys, vals, err := ejp.readObject(2, true) + if err != nil { + return nil, err + } + + ejp.advanceState() + if ejp.s != jpsSawEndObject { + return nil, invalidJSONErrorForType("2 key-value pairs and then }", t) + } + + v = &extJSONValue{t: TypeEmbeddedDocument, v: &extJSONObject{keys: keys, values: vals}} + + case TypeDateTime: + switch ejp.s { + case jpsSawValue: + v = ejp.v + case jpsSawKey: + // read colon + ejp.advanceState() + if err := ensureColon(ejp.s, ejp.k); err != nil { + return nil, err + } + + ejp.advanceState() + switch ejp.s { + case jpsSawBeginObject: + keys, vals, err := ejp.readObject(1, true) + if err != nil { + return nil, err + } + v = &extJSONValue{t: TypeEmbeddedDocument, v: &extJSONObject{keys: keys, values: vals}} + case jpsSawValue: + if ejp.canonicalOnly { + return nil, invalidJSONError("{") + } + v = ejp.v + default: + if ejp.canonicalOnly { + return nil, invalidJSONErrorForType("object", t) + } + return nil, invalidJSONErrorForType("ISO-8601 Internet Date/Time Format as described in RFC-3339", t) + } + + ejp.advanceState() + if ejp.s != jpsSawEndObject { + return nil, invalidJSONErrorForType("value and then }", t) + } + default: + return nil, invalidRequestError(t.String()) + } + case TypeJavaScript: + switch ejp.s { + case jpsSawKey: + // read colon + ejp.advanceState() + if err := ensureColon(ejp.s, ejp.k); err != nil { + return nil, err + } + + // read value + ejp.advanceState() + if ejp.s != jpsSawValue { + return nil, invalidJSONErrorForType("value", t) + } + v = ejp.v + + // read end object or comma and just return + ejp.advanceState() + case jpsSawEndObject: + v = ejp.v + default: + return nil, invalidRequestError(t.String()) + } + case TypeCodeWithScope: + if ejp.s == jpsSawKey && ejp.k == "$scope" { + v = ejp.v // this is the $code string from earlier + + // read colon + ejp.advanceState() + if err := ensureColon(ejp.s, ejp.k); err != nil { + return nil, err + } + + // read { + ejp.advanceState() + if ejp.s != jpsSawBeginObject { + return nil, invalidJSONError("$scope to be embedded document") + } + } else { + return nil, invalidRequestError(t.String()) + } + case TypeEmbeddedDocument, TypeArray: + return nil, invalidRequestError(t.String()) + } + + return v, nil +} + +// readObject is a utility method for reading full objects of known (or expected) size +// it is useful for extended JSON types such as binary, datetime, regex, and timestamp +func (ejp *extJSONParser) readObject(numKeys int, started bool) ([]string, []*extJSONValue, error) { + keys := make([]string, numKeys) + vals := make([]*extJSONValue, numKeys) + + if !started { + ejp.advanceState() + if ejp.s != jpsSawBeginObject { + return nil, nil, invalidJSONError("{") + } + } + + for i := 0; i < numKeys; i++ { + key, t, err := ejp.readKey() + if err != nil { + return nil, nil, err + } + + switch ejp.s { + case jpsSawKey: + v, err := ejp.readValue(t) + if err != nil { + return nil, nil, err + } + + keys[i] = key + vals[i] = v + case jpsSawValue: + keys[i] = key + vals[i] = ejp.v + default: + return nil, nil, invalidJSONError("value") + } + } + + ejp.advanceState() + if ejp.s != jpsSawEndObject { + return nil, nil, invalidJSONError("}") + } + + return keys, vals, nil +} + +// advanceState reads the next JSON token from the scanner and transitions +// from the current state based on that token's type +func (ejp *extJSONParser) advanceState() { + if ejp.s == jpsDoneState || ejp.s == jpsInvalidState { + return + } + + jt, err := ejp.js.nextToken() + + if err != nil { + ejp.err = err + ejp.s = jpsInvalidState + return + } + + valid := ejp.validateToken(jt.t) + if !valid { + ejp.err = unexpectedTokenError(jt) + ejp.s = jpsInvalidState + return + } + + switch jt.t { + case jttBeginObject: + ejp.s = jpsSawBeginObject + ejp.pushMode(jpmObjectMode) + ejp.depth++ + + if ejp.depth > ejp.maxDepth { + ejp.err = nestingDepthError(jt.p, ejp.depth) + ejp.s = jpsInvalidState + } + case jttEndObject: + ejp.s = jpsSawEndObject + ejp.depth-- + + if ejp.popMode() != jpmObjectMode { + ejp.err = unexpectedTokenError(jt) + ejp.s = jpsInvalidState + } + case jttBeginArray: + ejp.s = jpsSawBeginArray + ejp.pushMode(jpmArrayMode) + case jttEndArray: + ejp.s = jpsSawEndArray + + if ejp.popMode() != jpmArrayMode { + ejp.err = unexpectedTokenError(jt) + ejp.s = jpsInvalidState + } + case jttColon: + ejp.s = jpsSawColon + case jttComma: + ejp.s = jpsSawComma + case jttEOF: + ejp.s = jpsDoneState + if len(ejp.m) != 0 { + ejp.err = unexpectedTokenError(jt) + ejp.s = jpsInvalidState + } + case jttString: + switch ejp.s { + case jpsSawComma: + if ejp.peekMode() == jpmArrayMode { + ejp.s = jpsSawValue + ejp.v = extendJSONToken(jt) + return + } + fallthrough + case jpsSawBeginObject: + ejp.s = jpsSawKey + ejp.k = jt.v.(string) + return + } + fallthrough + default: + ejp.s = jpsSawValue + ejp.v = extendJSONToken(jt) + } +} + +var jpsValidTransitionTokens = map[jsonParseState]map[jsonTokenType]bool{ + jpsStartState: { + jttBeginObject: true, + jttBeginArray: true, + jttInt32: true, + jttInt64: true, + jttDouble: true, + jttString: true, + jttBool: true, + jttNull: true, + jttEOF: true, + }, + jpsSawBeginObject: { + jttEndObject: true, + jttString: true, + }, + jpsSawEndObject: { + jttEndObject: true, + jttEndArray: true, + jttComma: true, + jttEOF: true, + }, + jpsSawBeginArray: { + jttBeginObject: true, + jttBeginArray: true, + jttEndArray: true, + jttInt32: true, + jttInt64: true, + jttDouble: true, + jttString: true, + jttBool: true, + jttNull: true, + }, + jpsSawEndArray: { + jttEndObject: true, + jttEndArray: true, + jttComma: true, + jttEOF: true, + }, + jpsSawColon: { + jttBeginObject: true, + jttBeginArray: true, + jttInt32: true, + jttInt64: true, + jttDouble: true, + jttString: true, + jttBool: true, + jttNull: true, + }, + jpsSawComma: { + jttBeginObject: true, + jttBeginArray: true, + jttInt32: true, + jttInt64: true, + jttDouble: true, + jttString: true, + jttBool: true, + jttNull: true, + }, + jpsSawKey: { + jttColon: true, + }, + jpsSawValue: { + jttEndObject: true, + jttEndArray: true, + jttComma: true, + jttEOF: true, + }, + jpsDoneState: {}, + jpsInvalidState: {}, +} + +func (ejp *extJSONParser) validateToken(jtt jsonTokenType) bool { + switch ejp.s { + case jpsSawEndObject: + // if we are at depth zero and the next token is a '{', + // we can consider it valid only if we are not in array mode. + if jtt == jttBeginObject && ejp.depth == 0 { + return ejp.peekMode() != jpmArrayMode + } + case jpsSawComma: + switch ejp.peekMode() { + // the only valid next token after a comma inside a document is a string (a key) + case jpmObjectMode: + return jtt == jttString + case jpmInvalidMode: + return false + } + } + + _, ok := jpsValidTransitionTokens[ejp.s][jtt] + return ok +} + +// ensureExtValueType returns true if the current value has the expected +// value type for single-key extended JSON types. For example, +// {"$numberInt": v} v must be TypeString +func (ejp *extJSONParser) ensureExtValueType(t Type) bool { + switch t { + case TypeMinKey, TypeMaxKey: + return ejp.v.t == TypeInt32 + case TypeUndefined: + return ejp.v.t == TypeBoolean + case TypeInt32, TypeInt64, TypeDouble, TypeDecimal128, TypeSymbol, TypeObjectID: + return ejp.v.t == TypeString + default: + return false + } +} + +func (ejp *extJSONParser) pushMode(m jsonParseMode) { + ejp.m = append(ejp.m, m) +} + +func (ejp *extJSONParser) popMode() jsonParseMode { + l := len(ejp.m) + if l == 0 { + return jpmInvalidMode + } + + m := ejp.m[l-1] + ejp.m = ejp.m[:l-1] + + return m +} + +func (ejp *extJSONParser) peekMode() jsonParseMode { + l := len(ejp.m) + if l == 0 { + return jpmInvalidMode + } + + return ejp.m[l-1] +} + +func extendJSONToken(jt *jsonToken) *extJSONValue { + var t Type + + switch jt.t { + case jttInt32: + t = TypeInt32 + case jttInt64: + t = TypeInt64 + case jttDouble: + t = TypeDouble + case jttString: + t = TypeString + case jttBool: + t = TypeBoolean + case jttNull: + t = TypeNull + default: + return nil + } + + return &extJSONValue{t: t, v: jt.v} +} + +func ensureColon(s jsonParseState, key string) error { + if s != jpsSawColon { + return fmt.Errorf("invalid JSON input: missing colon after key \"%s\"", key) + } + + return nil +} + +func invalidRequestError(s string) error { + return fmt.Errorf("invalid request to read %s", s) +} + +func invalidJSONError(expected string) error { + return fmt.Errorf("invalid JSON input; expected %s", expected) +} + +func invalidJSONErrorForType(expected string, t Type) error { + return fmt.Errorf("invalid JSON input; expected %s for %s", expected, t) +} + +func unexpectedTokenError(jt *jsonToken) error { + switch jt.t { + case jttInt32, jttInt64, jttDouble: + return fmt.Errorf("invalid JSON input; unexpected number (%v) at position %d", jt.v, jt.p) + case jttString: + return fmt.Errorf("invalid JSON input; unexpected string (\"%v\") at position %d", jt.v, jt.p) + case jttBool: + return fmt.Errorf("invalid JSON input; unexpected boolean literal (%v) at position %d", jt.v, jt.p) + case jttNull: + return fmt.Errorf("invalid JSON input; unexpected null literal at position %d", jt.p) + case jttEOF: + return fmt.Errorf("invalid JSON input; unexpected end of input at position %d", jt.p) + default: + return fmt.Errorf("invalid JSON input; unexpected %c at position %d", jt.v.(byte), jt.p) + } +} + +func nestingDepthError(p, depth int) error { + return fmt.Errorf("invalid JSON input; nesting too deep (%d levels) at position %d", depth, p) +} diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/extjson_reader.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/extjson_reader.go new file mode 100644 index 00000000..4bee3efc --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/extjson_reader.go @@ -0,0 +1,606 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package bson + +import ( + "errors" + "fmt" + "io" +) + +type ejvrState struct { + mode mode + vType Type + depth int +} + +// extJSONValueReader is for reading extended JSON. +type extJSONValueReader struct { + p *extJSONParser + + stack []ejvrState + frame int +} + +// NewExtJSONValueReader returns a ValueReader that reads Extended JSON values +// from r. If canonicalOnly is true, reading values from the ValueReader returns +// an error if the Extended JSON was not marshaled in canonical mode. +func NewExtJSONValueReader(r io.Reader, canonicalOnly bool) (ValueReader, error) { + return newExtJSONValueReader(r, canonicalOnly) +} + +func newExtJSONValueReader(r io.Reader, canonicalOnly bool) (*extJSONValueReader, error) { + ejvr := new(extJSONValueReader) + return ejvr.reset(r, canonicalOnly) +} + +func (ejvr *extJSONValueReader) reset(r io.Reader, canonicalOnly bool) (*extJSONValueReader, error) { + p := newExtJSONParser(r, canonicalOnly) + typ, err := p.peekType() + + if err != nil { + return nil, ErrInvalidJSON + } + + var m mode + switch typ { + case TypeEmbeddedDocument: + m = mTopLevel + case TypeArray: + m = mArray + default: + m = mValue + } + + stack := make([]ejvrState, 1, 5) + stack[0] = ejvrState{ + mode: m, + vType: typ, + } + return &extJSONValueReader{ + p: p, + stack: stack, + }, nil +} + +func (ejvr *extJSONValueReader) advanceFrame() { + if ejvr.frame+1 >= len(ejvr.stack) { // We need to grow the stack + length := len(ejvr.stack) + if length+1 >= cap(ejvr.stack) { + // double it + buf := make([]ejvrState, 2*cap(ejvr.stack)+1) + copy(buf, ejvr.stack) + ejvr.stack = buf + } + ejvr.stack = ejvr.stack[:length+1] + } + ejvr.frame++ + + // Clean the stack + ejvr.stack[ejvr.frame].mode = 0 + ejvr.stack[ejvr.frame].vType = 0 + ejvr.stack[ejvr.frame].depth = 0 +} + +func (ejvr *extJSONValueReader) pushDocument() { + ejvr.advanceFrame() + + ejvr.stack[ejvr.frame].mode = mDocument + ejvr.stack[ejvr.frame].depth = ejvr.p.depth +} + +func (ejvr *extJSONValueReader) pushCodeWithScope() { + ejvr.advanceFrame() + + ejvr.stack[ejvr.frame].mode = mCodeWithScope +} + +func (ejvr *extJSONValueReader) pushArray() { + ejvr.advanceFrame() + + ejvr.stack[ejvr.frame].mode = mArray +} + +func (ejvr *extJSONValueReader) push(m mode, t Type) { + ejvr.advanceFrame() + + ejvr.stack[ejvr.frame].mode = m + ejvr.stack[ejvr.frame].vType = t +} + +func (ejvr *extJSONValueReader) pop() { + switch ejvr.stack[ejvr.frame].mode { + case mElement, mValue: + ejvr.frame-- + case mDocument, mArray, mCodeWithScope: + ejvr.frame -= 2 // we pop twice to jump over the vrElement: vrDocument -> vrElement -> vrDocument/TopLevel/etc... + } +} + +func (ejvr *extJSONValueReader) skipObject() { + // read entire object until depth returns to 0 (last ending } or ] seen) + depth := 1 + for depth > 0 { + ejvr.p.advanceState() + + // If object is empty, raise depth and continue. When emptyObject is true, the + // parser has already read both the opening and closing brackets of an empty + // object ("{}"), so the next valid token will be part of the parent document, + // not part of the nested document. + // + // If there is a comma, there are remaining fields, emptyObject must be set back + // to false, and comma must be skipped with advanceState(). + if ejvr.p.emptyObject { + if ejvr.p.s == jpsSawComma { + ejvr.p.emptyObject = false + ejvr.p.advanceState() + } + depth-- + continue + } + + switch ejvr.p.s { + case jpsSawBeginObject, jpsSawBeginArray: + depth++ + case jpsSawEndObject, jpsSawEndArray: + depth-- + } + } +} + +func (ejvr *extJSONValueReader) invalidTransitionErr(destination mode, name string, modes []mode) error { + te := TransitionError{ + name: name, + current: ejvr.stack[ejvr.frame].mode, + destination: destination, + modes: modes, + action: "read", + } + if ejvr.frame != 0 { + te.parent = ejvr.stack[ejvr.frame-1].mode + } + return te +} + +func (ejvr *extJSONValueReader) typeError(t Type) error { + return fmt.Errorf("positioned on %s, but attempted to read %s", ejvr.stack[ejvr.frame].vType, t) +} + +func (ejvr *extJSONValueReader) ensureElementValue(t Type, destination mode, callerName string, addModes ...mode) error { + switch ejvr.stack[ejvr.frame].mode { + case mElement, mValue: + if ejvr.stack[ejvr.frame].vType != t { + return ejvr.typeError(t) + } + default: + modes := []mode{mElement, mValue} + if addModes != nil { + modes = append(modes, addModes...) + } + return ejvr.invalidTransitionErr(destination, callerName, modes) + } + + return nil +} + +func (ejvr *extJSONValueReader) Type() Type { + return ejvr.stack[ejvr.frame].vType +} + +func (ejvr *extJSONValueReader) Skip() error { + switch ejvr.stack[ejvr.frame].mode { + case mElement, mValue: + default: + return ejvr.invalidTransitionErr(0, "Skip", []mode{mElement, mValue}) + } + + defer ejvr.pop() + + t := ejvr.stack[ejvr.frame].vType + switch t { + case TypeArray, TypeEmbeddedDocument, TypeCodeWithScope: + // read entire array, doc or CodeWithScope + ejvr.skipObject() + default: + _, err := ejvr.p.readValue(t) + if err != nil { + return err + } + } + + return nil +} + +func (ejvr *extJSONValueReader) ReadArray() (ArrayReader, error) { + switch ejvr.stack[ejvr.frame].mode { + case mTopLevel: // allow reading array from top level + case mArray: + return ejvr, nil + default: + if err := ejvr.ensureElementValue(TypeArray, mArray, "ReadArray", mTopLevel, mArray); err != nil { + return nil, err + } + } + + ejvr.pushArray() + + return ejvr, nil +} + +func (ejvr *extJSONValueReader) ReadBinary() (b []byte, btype byte, err error) { + if err := ejvr.ensureElementValue(TypeBinary, 0, "ReadBinary"); err != nil { + return nil, 0, err + } + + v, err := ejvr.p.readValue(TypeBinary) + if err != nil { + return nil, 0, err + } + + b, btype, err = v.parseBinary() + + ejvr.pop() + return b, btype, err +} + +func (ejvr *extJSONValueReader) ReadBoolean() (bool, error) { + if err := ejvr.ensureElementValue(TypeBoolean, 0, "ReadBoolean"); err != nil { + return false, err + } + + v, err := ejvr.p.readValue(TypeBoolean) + if err != nil { + return false, err + } + + if v.t != TypeBoolean { + return false, fmt.Errorf("expected type bool, but got type %s", v.t) + } + + ejvr.pop() + return v.v.(bool), nil +} + +func (ejvr *extJSONValueReader) ReadDocument() (DocumentReader, error) { + switch ejvr.stack[ejvr.frame].mode { + case mTopLevel: + return ejvr, nil + case mElement, mValue: + if ejvr.stack[ejvr.frame].vType != TypeEmbeddedDocument { + return nil, ejvr.typeError(TypeEmbeddedDocument) + } + + ejvr.pushDocument() + return ejvr, nil + default: + return nil, ejvr.invalidTransitionErr(mDocument, "ReadDocument", []mode{mTopLevel, mElement, mValue}) + } +} + +func (ejvr *extJSONValueReader) ReadCodeWithScope() (code string, dr DocumentReader, err error) { + if err = ejvr.ensureElementValue(TypeCodeWithScope, 0, "ReadCodeWithScope"); err != nil { + return "", nil, err + } + + v, err := ejvr.p.readValue(TypeCodeWithScope) + if err != nil { + return "", nil, err + } + + code, err = v.parseJavascript() + + ejvr.pushCodeWithScope() + return code, ejvr, err +} + +func (ejvr *extJSONValueReader) ReadDBPointer() (ns string, oid ObjectID, err error) { + if err = ejvr.ensureElementValue(TypeDBPointer, 0, "ReadDBPointer"); err != nil { + return "", NilObjectID, err + } + + v, err := ejvr.p.readValue(TypeDBPointer) + if err != nil { + return "", NilObjectID, err + } + + ns, oid, err = v.parseDBPointer() + + ejvr.pop() + return ns, oid, err +} + +func (ejvr *extJSONValueReader) ReadDateTime() (int64, error) { + if err := ejvr.ensureElementValue(TypeDateTime, 0, "ReadDateTime"); err != nil { + return 0, err + } + + v, err := ejvr.p.readValue(TypeDateTime) + if err != nil { + return 0, err + } + + d, err := v.parseDateTime() + + ejvr.pop() + return d, err +} + +func (ejvr *extJSONValueReader) ReadDecimal128() (Decimal128, error) { + if err := ejvr.ensureElementValue(TypeDecimal128, 0, "ReadDecimal128"); err != nil { + return Decimal128{}, err + } + + v, err := ejvr.p.readValue(TypeDecimal128) + if err != nil { + return Decimal128{}, err + } + + d, err := v.parseDecimal128() + + ejvr.pop() + return d, err +} + +func (ejvr *extJSONValueReader) ReadDouble() (float64, error) { + if err := ejvr.ensureElementValue(TypeDouble, 0, "ReadDouble"); err != nil { + return 0, err + } + + v, err := ejvr.p.readValue(TypeDouble) + if err != nil { + return 0, err + } + + d, err := v.parseDouble() + + ejvr.pop() + return d, err +} + +func (ejvr *extJSONValueReader) ReadInt32() (int32, error) { + if err := ejvr.ensureElementValue(TypeInt32, 0, "ReadInt32"); err != nil { + return 0, err + } + + v, err := ejvr.p.readValue(TypeInt32) + if err != nil { + return 0, err + } + + i, err := v.parseInt32() + + ejvr.pop() + return i, err +} + +func (ejvr *extJSONValueReader) ReadInt64() (int64, error) { + if err := ejvr.ensureElementValue(TypeInt64, 0, "ReadInt64"); err != nil { + return 0, err + } + + v, err := ejvr.p.readValue(TypeInt64) + if err != nil { + return 0, err + } + + i, err := v.parseInt64() + + ejvr.pop() + return i, err +} + +func (ejvr *extJSONValueReader) ReadJavascript() (code string, err error) { + if err = ejvr.ensureElementValue(TypeJavaScript, 0, "ReadJavascript"); err != nil { + return "", err + } + + v, err := ejvr.p.readValue(TypeJavaScript) + if err != nil { + return "", err + } + + code, err = v.parseJavascript() + + ejvr.pop() + return code, err +} + +func (ejvr *extJSONValueReader) ReadMaxKey() error { + if err := ejvr.ensureElementValue(TypeMaxKey, 0, "ReadMaxKey"); err != nil { + return err + } + + v, err := ejvr.p.readValue(TypeMaxKey) + if err != nil { + return err + } + + err = v.parseMinMaxKey("max") + + ejvr.pop() + return err +} + +func (ejvr *extJSONValueReader) ReadMinKey() error { + if err := ejvr.ensureElementValue(TypeMinKey, 0, "ReadMinKey"); err != nil { + return err + } + + v, err := ejvr.p.readValue(TypeMinKey) + if err != nil { + return err + } + + err = v.parseMinMaxKey("min") + + ejvr.pop() + return err +} + +func (ejvr *extJSONValueReader) ReadNull() error { + if err := ejvr.ensureElementValue(TypeNull, 0, "ReadNull"); err != nil { + return err + } + + v, err := ejvr.p.readValue(TypeNull) + if err != nil { + return err + } + + if v.t != TypeNull { + return fmt.Errorf("expected type null but got type %s", v.t) + } + + ejvr.pop() + return nil +} + +func (ejvr *extJSONValueReader) ReadObjectID() (ObjectID, error) { + if err := ejvr.ensureElementValue(TypeObjectID, 0, "ReadObjectID"); err != nil { + return ObjectID{}, err + } + + v, err := ejvr.p.readValue(TypeObjectID) + if err != nil { + return ObjectID{}, err + } + + oid, err := v.parseObjectID() + + ejvr.pop() + return oid, err +} + +func (ejvr *extJSONValueReader) ReadRegex() (pattern string, options string, err error) { + if err = ejvr.ensureElementValue(TypeRegex, 0, "ReadRegex"); err != nil { + return "", "", err + } + + v, err := ejvr.p.readValue(TypeRegex) + if err != nil { + return "", "", err + } + + pattern, options, err = v.parseRegex() + + ejvr.pop() + return pattern, options, err +} + +func (ejvr *extJSONValueReader) ReadString() (string, error) { + if err := ejvr.ensureElementValue(TypeString, 0, "ReadString"); err != nil { + return "", err + } + + v, err := ejvr.p.readValue(TypeString) + if err != nil { + return "", err + } + + if v.t != TypeString { + return "", fmt.Errorf("expected type string but got type %s", v.t) + } + + ejvr.pop() + return v.v.(string), nil +} + +func (ejvr *extJSONValueReader) ReadSymbol() (symbol string, err error) { + if err = ejvr.ensureElementValue(TypeSymbol, 0, "ReadSymbol"); err != nil { + return "", err + } + + v, err := ejvr.p.readValue(TypeSymbol) + if err != nil { + return "", err + } + + symbol, err = v.parseSymbol() + + ejvr.pop() + return symbol, err +} + +func (ejvr *extJSONValueReader) ReadTimestamp() (t uint32, i uint32, err error) { + if err = ejvr.ensureElementValue(TypeTimestamp, 0, "ReadTimestamp"); err != nil { + return 0, 0, err + } + + v, err := ejvr.p.readValue(TypeTimestamp) + if err != nil { + return 0, 0, err + } + + t, i, err = v.parseTimestamp() + + ejvr.pop() + return t, i, err +} + +func (ejvr *extJSONValueReader) ReadUndefined() error { + if err := ejvr.ensureElementValue(TypeUndefined, 0, "ReadUndefined"); err != nil { + return err + } + + v, err := ejvr.p.readValue(TypeUndefined) + if err != nil { + return err + } + + err = v.parseUndefined() + + ejvr.pop() + return err +} + +func (ejvr *extJSONValueReader) ReadElement() (string, ValueReader, error) { + switch ejvr.stack[ejvr.frame].mode { + case mTopLevel, mDocument, mCodeWithScope: + default: + return "", nil, ejvr.invalidTransitionErr(mElement, "ReadElement", []mode{mTopLevel, mDocument, mCodeWithScope}) + } + + name, t, err := ejvr.p.readKey() + + if err != nil { + if errors.Is(err, ErrEOD) { + if ejvr.stack[ejvr.frame].mode == mCodeWithScope { + _, err := ejvr.p.peekType() + if err != nil { + return "", nil, err + } + } + + ejvr.pop() + } + + return "", nil, err + } + + ejvr.push(mElement, t) + return name, ejvr, nil +} + +func (ejvr *extJSONValueReader) ReadValue() (ValueReader, error) { + switch ejvr.stack[ejvr.frame].mode { + case mArray: + default: + return nil, ejvr.invalidTransitionErr(mValue, "ReadValue", []mode{mArray}) + } + + t, err := ejvr.p.peekType() + if err != nil { + if errors.Is(err, ErrEOA) { + ejvr.pop() + } + + return nil, err + } + + ejvr.push(mValue, t) + return ejvr, nil +} diff --git a/vendor/go.mongodb.org/mongo-driver/v2/bson/extjson_tables.go b/vendor/go.mongodb.org/mongo-driver/v2/bson/extjson_tables.go new file mode 100644 index 00000000..5384db22 --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/v2/bson/extjson_tables.go @@ -0,0 +1,223 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 +// +// Based on github.com/golang/go by The Go Authors +// See THIRD-PARTY-NOTICES for original license terms. + +package bson + +import "unicode/utf8" + +// safeSet holds the value true if the ASCII character with the given array +// position can be represented inside a JSON string without any further +// escaping. +// +// All values are true except for the ASCII control characters (0-31), the +// double quote ("), and the backslash character ("\"). +var safeSet = [utf8.RuneSelf]bool{ + ' ': true, + '!': true, + '"': false, + '#': true, + '$': true, + '%': true, + '&': true, + '\'': true, + '(': true, + ')': true, + '*': true, + '+': true, + ',': true, + '-': true, + '.': true, + '/': true, + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + ':': true, + ';': true, + '<': true, + '=': true, + '>': true, + '?': true, + '@': true, + 'A': true, + 'B': true, + 'C': true, + 'D': true, + 'E': true, + 'F': true, + 'G': true, + 'H': true, + 'I': true, + 'J': true, + 'K': true, + 'L': true, + 'M': true, + 'N': true, + 'O': true, + 'P': true, + 'Q': true, + 'R': true, + 'S': true, + 'T': true, + 'U': true, + 'V': true, + 'W': true, + 'X': true, + 'Y': true, + 'Z': true, + '[': true, + '\\': false, + ']': true, + '^': true, + '_': true, + '`': true, + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + '{': true, + '|': true, + '}': true, + '~': true, + '\u007f': true, +} + +// htmlSafeSet holds the value true if the ASCII character with the given +// array position can be safely represented inside a JSON string, embedded +// inside of HTML