From be376698084e0fc140c3c13781b954abed674ff7 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 17 Dec 2024 09:31:39 +0100 Subject: [PATCH] Add linear cost dedupe --- go.mod | 1 + go.sum | 2 + policy/policy.go | 31 +++++++++ policy/policy_test.go | 153 ++++++++++++++++++++++++++++++++++++++++-- policy/statement.go | 40 +++++++++++ 5 files changed, 223 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index e765b52..b6c3af1 100644 --- a/go.mod +++ b/go.mod @@ -65,6 +65,7 @@ require ( github.com/tklauser/go-sysconf v0.3.14 // indirect github.com/tklauser/numcpus v0.8.0 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect + github.com/zeebo/xxh3 v1.0.2 // indirect go.etcd.io/etcd/api/v3 v3.5.16 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.16 // indirect go.uber.org/multierr v1.11.0 // indirect diff --git a/go.sum b/go.sum index 61b63a6..2045d00 100644 --- a/go.sum +++ b/go.sum @@ -170,6 +170,8 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= +github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= go.etcd.io/etcd/api/v3 v3.5.16 h1:WvmyJVbjWqK4R1E+B12RRHz3bRGy9XVfh++MgbN+6n0= go.etcd.io/etcd/api/v3 v3.5.16/go.mod h1:1P4SlIP/VwkDmGo3OlOD7faPeP8KDIFhqvciH5EfN28= go.etcd.io/etcd/client/pkg/v3 v3.5.16 h1:ZgY48uH6UvB+/7R9Yf4x574uCO3jIx0TRDyetSfId3Q= diff --git a/policy/policy.go b/policy/policy.go index 0b357c9..634b352 100644 --- a/policy/policy.go +++ b/policy/policy.go @@ -18,6 +18,8 @@ package policy import ( + "crypto/rand" + "encoding/binary" "encoding/json" "io" "strings" @@ -242,6 +244,11 @@ func MergePolicies(inputs ...Policy) Policy { } func (iamp *Policy) dropDuplicateStatements() { + // Select an O(N) version instead of O(N!) for more statements. + if len(iamp.Statements) > 10 { + iamp.dropDuplicateStatementsMany() + return + } dups := make(map[int]struct{}) for i := range iamp.Statements { if _, ok := dups[i]; ok { @@ -271,6 +278,30 @@ func (iamp *Policy) dropDuplicateStatements() { iamp.Statements = iamp.Statements[:c] } +func (iamp *Policy) dropDuplicateStatementsMany() { + // Calculate a hash for each. + // Drop statements with duplicate hashes. + found := make(map[[16]byte]struct{}, len(iamp.Statements)) + + // Apply a base seed + var baseSeed [8]byte + rand.Read(baseSeed[:]) + var seed uint64 + binary.LittleEndian.PutUint64(baseSeed[:], seed) + writeAt := 0 + for _, s := range iamp.Statements { + h := s.hash(seed) + if _, ok := found[h]; ok { + // duplicate, do not write. + continue + } + found[h] = struct{}{} + iamp.Statements[writeAt] = s + writeAt++ + } + iamp.Statements = iamp.Statements[:writeAt] +} + // UnmarshalJSON - decodes JSON data to Iamp. func (iamp *Policy) UnmarshalJSON(data []byte) error { // subtype to avoid recursive call to UnmarshalJSON() diff --git a/policy/policy_test.go b/policy/policy_test.go index 73651f2..08f6fe8 100644 --- a/policy/policy_test.go +++ b/policy/policy_test.go @@ -21,6 +21,7 @@ import ( "bytes" "encoding/json" "net" + "strconv" "strings" "testing" "time" @@ -1543,11 +1544,155 @@ func TestMergePolicies(t *testing.T) { }, }, }, + { + inputs: []Policy{p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3}, + expected: Policy{ + Version: DefaultVersion, + Statements: []Statement{ + NewStatement( + "", + Deny, + NewActionSet(AllAdminActions), + ResourceSet{}, + condition.NewFunctions(), + ), + NewStatement( + "", + Allow, + NewActionSet(AllActions), + NewResourceSet(NewResource("*")), + condition.NewFunctions(), + ), + NewStatement( + "", + Allow, + NewActionSet(GetBucketLocationAction), + NewResourceSet(NewResource("mybucket")), + condition.NewFunctions(), + ), + }, + }, + }, } for i, testCase := range testCases { - got := MergePolicies(testCase.inputs...) - if !got.Equals(testCase.expected) { - t.Errorf("Case %d: expected: %v, got %v", i+1, got, testCase.expected) - } + t.Run(strconv.Itoa(i), func(t *testing.T) { + got := MergePolicies(testCase.inputs...) + if !got.Equals(testCase.expected) { + t.Errorf("Case %d: expected: %v, got %v", i, testCase.expected, got) + } + }) + } +} + +func BenchmarkDedupe(b *testing.B) { + var allActions []Action + var allAdminActions []Action + for action := range supportedActions { + allActions = append(allActions, action) + } + for action := range supportedAdminActions { + allAdminActions = append(allAdminActions, Action(action)) + } + + p1 := Policy{ + Version: DefaultVersion, + Statements: []Statement{ + NewStatement( + "", + Deny, + NewActionSet(allAdminActions...), + NewResourceSet(NewResource("bucket0"), NewResource("bucket1"), NewResource("bucket2"), NewResource("bucket3"), NewResource("bucket4"), NewResource("bucket5")), + condition.NewFunctions(), + ), + NewStatement( + "", + Allow, + NewActionSet(allActions...), + NewResourceSet(NewResource("bucket0"), NewResource("bucket1"), NewResource("bucket2"), NewResource("bucket3"), NewResource("bucket4"), NewResource("bucket5")), + condition.NewFunctions(), + ), + }, + } + + // p2 is a subset of p1 + p2 := Policy{ + Version: DefaultVersion, + Statements: []Statement{ + NewStatement( + "", + Deny, + NewActionSet(allAdminActions...), + NewResourceSet(NewResource("bucket0"), NewResource("bucket1"), NewResource("bucket2"), NewResource("bucket3"), NewResource("bucket4"), NewResource("bucket5")), + condition.NewFunctions(), + ), + }, + } + + p3 := Policy{ + ID: "MyPolicyForMyBucket1", + Version: DefaultVersion, + Statements: []Statement{ + NewStatement( + "", + Allow, + NewActionSet(allActions...), + NewResourceSet(NewResource("mybucketA"), NewResource("mybucketB"), NewResource("mybucketC"), NewResource("mybucketD"), NewResource("mybucketE"), NewResource("mybucketF"), NewResource("mybucketG"), NewResource("mybucketH"), NewResource("mybucketI"), NewResource("mybucketJ"), NewResource("mybucketK"), NewResource("mybucketL"), NewResource("mybucketM"), NewResource("mybucketN"), NewResource("mybucketO"), NewResource("mybucketP"), NewResource("mybucketQ"), NewResource("mybucketR"), NewResource("mybucketS"), NewResource("mybucketS"), NewResource("mybucketU"), NewResource("mybucketV"), NewResource("mybucketX")), + condition.NewFunctions(), + ), + }, + } + + testCases := []struct { + inputs []Policy + expected Policy + }{ + { + inputs: []Policy{p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3}, + expected: Policy{ + Version: DefaultVersion, + Statements: []Statement{ + NewStatement( + "", + Deny, + NewActionSet(allAdminActions...), + NewResourceSet(NewResource("bucket0"), NewResource("bucket1"), NewResource("bucket2"), NewResource("bucket3"), NewResource("bucket4"), NewResource("bucket5")), + condition.NewFunctions(), + ), + NewStatement( + "", + Allow, + NewActionSet(allActions...), + NewResourceSet(NewResource("bucket0"), NewResource("bucket1"), NewResource("bucket2"), NewResource("bucket3"), NewResource("bucket4"), NewResource("bucket5")), + condition.NewFunctions(), + ), + NewStatement( + "", + Allow, + NewActionSet(allActions...), + NewResourceSet(NewResource("mybucketA"), NewResource("mybucketB"), NewResource("mybucketC"), NewResource("mybucketD"), NewResource("mybucketE"), NewResource("mybucketF"), NewResource("mybucketG"), NewResource("mybucketH"), NewResource("mybucketI"), NewResource("mybucketJ"), NewResource("mybucketK"), NewResource("mybucketL"), NewResource("mybucketM"), NewResource("mybucketN"), NewResource("mybucketO"), NewResource("mybucketP"), NewResource("mybucketQ"), NewResource("mybucketR"), NewResource("mybucketS"), NewResource("mybucketS"), NewResource("mybucketU"), NewResource("mybucketV"), NewResource("mybucketX")), + condition.NewFunctions(), + ), + }, + }, + }, + } + for i, testCase := range testCases { + b.Run(strconv.Itoa(i), func(b *testing.B) { + var merged Policy + for _, p := range testCase.inputs { + if merged.Version == "" { + merged.Version = p.Version + } + for _, st := range p.Statements { + merged.Statements = append(merged.Statements, st.Clone()) + } + } + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + shallow := merged + shallow.dropDuplicateStatements() + } + }) } } diff --git a/policy/statement.go b/policy/statement.go index b594ac5..4897d94 100644 --- a/policy/statement.go +++ b/policy/statement.go @@ -18,9 +18,11 @@ package policy import ( + "encoding/binary" "strings" "github.com/minio/pkg/v3/policy/condition" + "github.com/zeebo/xxh3" ) // Statement - iam policy statement. @@ -203,6 +205,44 @@ func (statement Statement) Equals(st Statement) bool { return true } +// Equals checks if two statements are equal +func (statement Statement) hash(seed uint64) [16]byte { + // Order independent xor. + xorTo := func(dst *xxh3.Uint128, v xxh3.Uint128) { + dst.Lo ^= v.Lo + dst.Hi ^= v.Hi + } + // Add value with seed. + xorInt := func(dst *xxh3.Uint128, n int, seed uint64) { + var tmp [8]byte + binary.LittleEndian.PutUint64(tmp[:], uint64(n)) + xorTo(dst, xxh3.Hash128Seed(tmp[:], seed)) + } + + h := xxh3.HashString128Seed(string(statement.Effect), seed) + + xorInt(&h, len(statement.Actions), seed+1) + for action := range statement.Actions { + xorTo(&h, xxh3.HashString128Seed(string(action), seed+2)) + } + + xorInt(&h, len(statement.NotActions), seed+3) + for action := range statement.NotActions { + xorTo(&h, xxh3.HashString128Seed(string(action), seed+4)) + } + + xorInt(&h, len(statement.Resources), seed+5) + for res := range statement.Resources { + xorTo(&h, xxh3.HashString128Seed(res.Pattern+res.Type.String(), seed+6)) + } + + xorInt(&h, len(statement.Conditions), seed+7) + for _, cond := range statement.Conditions { + xorTo(&h, xxh3.HashString128Seed(cond.String(), seed+8)) + } + return h.Bytes() +} + // Clone clones Statement structure func (statement Statement) Clone() Statement { return Statement{