Skip to content

Commit aca78d4

Browse files
v1/topdown/graphql: Cache GraphQL schema parse results (open-policy-agent#5377)
This commit stores parsed GraphQL schemas to the cache, which improves the performance of GraphQL operations that parse the schema more than once. Rough Draft Complete: - graphql.schema_is_valid TODO: - graphql.is_valid - graphql.parse - graphql.parse_and_verify - graphql.parse_query - graphql.parse_schema goarch: amd64 pkg: github.com/open-policy-agent/opa/v1/topdown cpu: Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz BenchmarkGraphQLSchemaIsValid/Trivial_Schema_-_string-16 16966 68579 ns/op BenchmarkGraphQLSchemaIsValid/Trivial_Schema_with_cache_-_string-16 1000000 1062 ns/op BenchmarkGraphQLSchemaIsValid/Trivial_Schema_-_object-16 12900 92484 ns/op BenchmarkGraphQLSchemaIsValid/Trivial_Schema_with_cache_-_object-16 92516 12064 ns/op BenchmarkGraphQLSchemaIsValid/Complex_Schema_-_string-16 60 19294987 ns/op BenchmarkGraphQLSchemaIsValid/Complex_Schema_with_cache_-_string-16 342 3443940 ns/op PASS ok github.com/open-policy-agent/opa/v1/topdown 12.613s Fixes: open-policy-agent#5377
1 parent f3de100 commit aca78d4

File tree

5 files changed

+955
-18
lines changed

5 files changed

+955
-18
lines changed

Diff for: docs/content/configuration.md

+4-3
Original file line numberDiff line numberDiff line change
@@ -861,14 +861,15 @@ Caching represents the configuration of the inter-query cache that built-in func
861861
functions provided by OPA, `http.send` is currently the only one to utilize the inter-query cache. See the documentation
862862
on the [http.send built-in function](../policy-reference/#http) for information about the available caching options.
863863

864-
It also represents the configuration of the inter-query _value_ cache that built-in functions can utilize. Currently,
864+
It also represents the configuration of the inter-query _value_ cache that built-in functions can utilize. Currently,
865865
this cache is utilized by the `regex` and `glob` built-in functions for compiled regex and glob match patterns
866-
respectively, and the `json.schema_match` built-in function for compiled JSON schemas.
866+
respectively, the `json.schema_match` built-in function for compiled JSON schemas, and the `graphql.schema_is_valid`
867+
function for parsed GraphQL schemas.
867868

868869
| Field | Type | Required | Description |
869870
|--------------------------------------------------------------------------|---------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
870871
| `caching.inter_query_builtin_cache.max_size_bytes` | `int64` | No | Inter-query cache size limit in bytes. OPA will drop old items from the cache if this limit is exceeded. By default, no limit is set. |
871-
| `caching.inter_query_builtin_cache.forced_eviction_threshold_percentage` | `int64` | No | Threshold limit configured as percentage of `caching.inter_query_builtin_cache.max_size_bytes`, when exceeded OPA will start dropping old items permaturely. By default, set to `100`. |
872+
| `caching.inter_query_builtin_cache.forced_eviction_threshold_percentage` | `int64` | No | Threshold limit configured as percentage of `caching.inter_query_builtin_cache.max_size_bytes`, when exceeded OPA will start dropping old items prematurely. By default, set to `100`. |
872873
| `caching.inter_query_builtin_cache.stale_entry_eviction_period_seconds` | `int64` | No | Stale entry eviction period in seconds. OPA will drop expired items from the cache every `stale_entry_eviction_period_seconds`. By default, set to `0` indicating stale entry eviction is disabled. |
873874
| `caching.inter_query_builtin_value_cache.max_num_entries` | `int` | No | Maximum number of entries in the Inter-query value cache. OPA will drop random items from the cache if this limit is exceeded. By default, set to `0` indicating unlimited size. |
874875
| `caching.inter_query_builtin_value_cache.named.io_jwt.max_num_entries` | `int` | No | Maximum number of entries in the `io_jwt` cache, used by the [`io.jwt` token verification](../policy-reference/#tokens) built-in functions. OPA will drop random items from the cache if this limit is exceeded. By default, this cache is disabled. |

Diff for: v1/topdown/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
complex-schema.gql

Diff for: v1/topdown/graphql.go

+78-15
Original file line numberDiff line numberDiff line change
@@ -453,28 +453,91 @@ func builtinGraphQLIsValid(_ BuiltinContext, operands []*ast.Term, iter func(*as
453453
return iter(ast.InternedBooleanTerm(true))
454454
}
455455

456-
func builtinGraphQLSchemaIsValid(_ BuiltinContext, operands []*ast.Term, iter func(*ast.Term) error) error {
457-
var schemaDoc *gqlast.SchemaDocument
456+
func builtinGraphQLSchemaIsValid(bctx BuiltinContext, operands []*ast.Term, iter func(*ast.Term) error) error {
458457
var err error
459458

460-
switch x := operands[0].Value.(type) {
461-
case ast.String:
462-
schemaDoc, err = parseSchema(string(x))
463-
case ast.Object:
464-
schemaDoc, err = objectToSchemaDocument(x)
465-
default:
466-
// Error if wrong type.
467-
return iter(ast.InternedBooleanTerm(false))
468-
}
469-
if err != nil {
470-
return iter(ast.InternedBooleanTerm(false))
459+
// Schemas are only cached if they are valid
460+
schemaCacheKey, schema := cacheGetSchema(bctx, operands)
461+
if schema == nil {
462+
var schemaDoc *gqlast.SchemaDocument
463+
var validatedSchema *gqlast.Schema
464+
465+
switch x := operands[0].Value.(type) {
466+
case ast.String:
467+
schemaDoc, err = parseSchema(string(x))
468+
case ast.Object:
469+
schemaDoc, err = objectToSchemaDocument(x)
470+
default:
471+
// Error if wrong type.
472+
return iter(ast.InternedBooleanTerm(false))
473+
}
474+
if err != nil {
475+
return iter(ast.InternedBooleanTerm(false))
476+
}
477+
// Validate the schema, this determines the result
478+
// and whether there is a schema to cache
479+
validatedSchema, err = convertSchema(schemaDoc)
480+
if bctx.InterQueryBuiltinValueCache != nil && err == nil {
481+
cacheInsertSchema(bctx, schemaCacheKey, validatedSchema)
482+
}
471483
}
472484

473-
// Validate the schema, this determines the result
474-
_, err = convertSchema(schemaDoc)
475485
return iter(ast.InternedBooleanTerm(err == nil))
476486
}
477487

488+
// Insert Schema into cache
489+
func cacheInsertSchema(bctx BuiltinContext, key string, schema *gqlast.Schema) int {
490+
cacheKeyAST := ast.StringTerm(key).Value
491+
numDroppedEntries := bctx.InterQueryBuiltinValueCache.Insert(cacheKeyAST, schema)
492+
return numDroppedEntries
493+
}
494+
495+
// Returns the cache key and a Schema if this key already exists in the cache
496+
func cacheGetSchema(bctx BuiltinContext, operands []*ast.Term) (string, *gqlast.Schema) {
497+
var key string
498+
var schema *gqlast.Schema
499+
500+
if k, keyOk := cacheKeyWithPrefix(bctx, operands, "gql_schema"); keyOk {
501+
key = k
502+
if val, ok := bctx.InterQueryBuiltinValueCache.Get(ast.StringTerm(key).Value); ok {
503+
var isSchema bool
504+
schema, isSchema = val.(*gqlast.Schema)
505+
if !isSchema {
506+
return key, nil
507+
}
508+
}
509+
}
510+
return key, schema
511+
}
512+
513+
// Compute a constant size key for use with the cache
514+
func cacheKeyWithPrefix(bctx BuiltinContext, operands []*ast.Term, prefix string) (string, bool) {
515+
var cacheKey ast.String
516+
var ok bool = false
517+
518+
if bctx.InterQueryBuiltinValueCache != nil {
519+
switch operands[0].Value.(type) {
520+
case ast.String:
521+
err := builtinCryptoSha256(bctx, operands, func(term *ast.Term) error {
522+
cacheKey = term.Value.(ast.String)
523+
return nil
524+
})
525+
ok = (len(cacheKey) > 0) && (err == nil)
526+
case ast.Object:
527+
objTerm := ast.NewTerm(ast.String(operands[0].String()))
528+
err := builtinCryptoSha256(bctx, []*ast.Term{objTerm}, func(term *ast.Term) error {
529+
cacheKey = term.Value.(ast.String)
530+
return nil
531+
})
532+
ok = (len(cacheKey) > 0) && (err == nil)
533+
default:
534+
ok = false
535+
}
536+
}
537+
538+
return prefix + "-" + string(cacheKey), ok
539+
}
540+
478541
func init() {
479542
RegisterBuiltinFunc(ast.GraphQLParse.Name, builtinGraphQLParse)
480543
RegisterBuiltinFunc(ast.GraphQLParseAndVerify.Name, builtinGraphQLParseAndVerify)

Diff for: v1/topdown/graphql_bench_test.go

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// Copyright 2025 The OPA Authors. All rights reserved.
2+
// Use of this source code is governed by an Apache2
3+
// license that can be found in the LICENSE file.
4+
5+
package topdown
6+
7+
import (
8+
"context"
9+
_ "embed"
10+
"testing"
11+
12+
"github.com/open-policy-agent/opa/v1/ast"
13+
"github.com/open-policy-agent/opa/v1/topdown/cache"
14+
)
15+
16+
// employeeGQLSchema is a simple schema defined in graphql_test.go
17+
//
18+
// If you don't have your own complex GQL schema you can use the
19+
// [GitHub GraphQL Schema](https://docs.github.com/en/graphql) for more realistic performance testing
20+
//
21+
// Download the schema with curl -o complex-schema.gql -L https://docs.github.com/public/fpt/schema.docs.graphql
22+
// and use embed so we don't include an extra 2+M in source tree.
23+
//
24+
//go:embed complex-schema.gql
25+
var complexGQLSchema string
26+
27+
// graphqurl --introspect https://api.github.com/graphql -H "Authorization: bearer $GITHUB_TOKEN" --format=json > complex-schema.json
28+
//
29+
//go:embed complex-schema.json
30+
var complexGQLSchemaJSON string
31+
32+
func BenchmarkGraphQLSchemaIsValid(b *testing.B) {
33+
34+
// Share an InterQueryValueCache across multiple runs
35+
// Tune number of entries to exceed number of distinct GQL schemas
36+
in := `{"inter_query_builtin_value_cache": {"max_num_entries": 10},}`
37+
config, _ := cache.ParseCachingConfig([]byte(in))
38+
valueCache := cache.NewInterQueryValueCache(context.Background(), config)
39+
40+
benches := []struct {
41+
desc string
42+
schema *ast.Term
43+
cache cache.InterQueryValueCache
44+
}{
45+
{"Trivial Schema - string", ast.NewTerm(ast.String(employeeGQLSchema)), nil},
46+
{"Trivial Schema with cache - string", ast.NewTerm(ast.String(employeeGQLSchema)), valueCache},
47+
{"Trivial Schema - object", ast.NewTerm(ast.MustParseTerm(employeeGQLSchemaAST).Value.(ast.Object)), nil},
48+
{"Trivial Schema with cache - object", ast.NewTerm(ast.MustParseTerm(employeeGQLSchemaAST).Value.(ast.Object)), valueCache},
49+
{"Trivial Schema - AST JSON string", ast.NewTerm(ast.String(employeeGQLSchemaAST)), nil},
50+
{"Trivial Schema with cache - AST JSON string", ast.NewTerm(ast.String(employeeGQLSchemaAST)), valueCache},
51+
{"Complex Schema - string", ast.NewTerm(ast.String(complexGQLSchema)), nil},
52+
{"Complex Schema with cache - string", ast.NewTerm(ast.String(complexGQLSchema)), valueCache},
53+
{"Complex Schema - JSON string", ast.NewTerm(ast.String(complexGQLSchemaJSON)), nil},
54+
{"Complex Schema with cache - JSON string", ast.NewTerm(ast.String(complexGQLSchemaJSON)), valueCache},
55+
}
56+
57+
for _, bench := range benches {
58+
b.Run(bench.desc, func(b *testing.B) {
59+
for range b.N {
60+
err := builtinGraphQLSchemaIsValid(
61+
BuiltinContext{
62+
InterQueryBuiltinValueCache: bench.cache,
63+
},
64+
[]*ast.Term{bench.schema},
65+
func(term *ast.Term) error {
66+
return nil
67+
},
68+
)
69+
70+
if err != nil {
71+
b.Fatalf("unexpected error: %s", err)
72+
}
73+
}
74+
})
75+
}
76+
}

0 commit comments

Comments
 (0)