Skip to content

Commit 2e70ac7

Browse files
committed
pkg/list: reimplement sort
The reimplemenation replaces uses the high-level API usage with using the low-level, making use of the fact that each Vertex used for comparison uses the same outline and the same conjuncts for "less". This significantly reduces the number of allocations and other redundant work. Before: Benchmark/sort.txtar-12 5 202090158 ns/op After: Benchmark/sort.txtar-12 28 43326277 ns/op Which is about an 80% reduction in running time. It is possible to reduce runtime further if the type indicated for x and y are idempotent. There are several strategies to analyze this. A TODO has been added for this. This change achieves the lion share of possible performance improvements for Sort. We leave it open, though, to verify the result with the various stakeholders. Issue #745 Signed-off-by: Marcel van Lohuizen <[email protected]> Change-Id: Idd6ac1925f5dd786313ea450218d4d17eb590581 Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/549087 Reviewed-by: Daniel Martí <[email protected]> Reviewed-by: Roger Peppe <[email protected]> Unity-Result: CUEcueckoo <[email protected]> TryBot-Result: CUEcueckoo <[email protected]>
1 parent 5d0a6f2 commit 2e70ac7

File tree

2 files changed

+98
-9
lines changed

2 files changed

+98
-9
lines changed

Diff for: pkg/list/sort.go

+96-9
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,20 @@ import (
2222
"sort"
2323

2424
"cuelang.org/go/cue"
25+
"cuelang.org/go/internal/core/adt"
26+
"cuelang.org/go/internal/types"
2527
)
2628

2729
// valueSorter defines a sort.Interface; implemented in cue/builtinutil.go.
2830
type valueSorter struct {
31+
ctx *adt.OpContext
2932
a []cue.Value
30-
cmp cue.Value
3133
err error
34+
35+
cmp *adt.Vertex
36+
less *adt.Vertex
37+
x *adt.Vertex
38+
y *adt.Vertex
3239
}
3340

3441
func (s *valueSorter) ret() ([]cue.Value, error) {
@@ -42,16 +49,89 @@ func (s *valueSorter) ret() ([]cue.Value, error) {
4249
func (s *valueSorter) Len() int { return len(s.a) }
4350
func (s *valueSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] }
4451
func (s *valueSorter) Less(i, j int) bool {
45-
v := s.cmp.Fill(s.a[i], "x")
46-
v = v.Fill(s.a[j], "y")
47-
isLess, err := v.Lookup("less").Bool()
48-
if err != nil && s.err == nil {
49-
s.err = err
52+
if s.err != nil {
53+
return false
54+
}
55+
var x, y types.Value
56+
s.a[i].Core(&x)
57+
s.a[j].Core(&y)
58+
59+
// Save the state of all relevant arcs and restore later for the
60+
// next comparison.
61+
saveCmp := *s.cmp
62+
saveLess := *s.less
63+
saveX := *s.x
64+
saveY := *s.y
65+
66+
for _, c := range x.V.Conjuncts {
67+
s.x.AddConjunct(c)
68+
}
69+
for _, c := range y.V.Conjuncts {
70+
s.y.AddConjunct(c)
71+
}
72+
73+
// TODO(perf): if we can determine that the comparator values for
74+
// x and y are idempotent (no arcs and a basevalue being top or
75+
// a struct or list marker), then we do not need to reevaluate the input.
76+
// In that case, we can use the below code instead of the above two loops
77+
// setting the conjuncts. This may improve performance significantly.
78+
//
79+
// s.x.BaseValue = x.V.BaseValue
80+
// s.x.Arcs = x.V.Arcs
81+
// s.y.BaseValue = y.V.BaseValue
82+
// s.y.Arcs = y.V.Arcs
83+
84+
s.less.Finalize(s.ctx)
85+
isLess := s.ctx.BoolValue(s.less)
86+
if b := s.less.Err(s.ctx, adt.Finalized); b != nil && s.err == nil {
87+
s.err = b.Err
5088
return true
5189
}
90+
91+
*s.less = saveLess
92+
*s.cmp = saveCmp
93+
*s.x = saveX
94+
*s.y = saveY
95+
5296
return isLess
5397
}
5498

99+
var less = cue.ParsePath("less")
100+
101+
func makeValueSorter(list []cue.Value, cmp cue.Value) (s valueSorter) {
102+
if v := cmp.LookupPath(less); !v.Exists() {
103+
return valueSorter{err: v.Err()}
104+
}
105+
106+
var v types.Value
107+
cmp.Core(&v)
108+
ctx := adt.NewContext(v.R, v.V)
109+
110+
n := &adt.Vertex{
111+
Label: v.V.Label,
112+
Parent: v.V.Parent,
113+
Conjuncts: v.V.Conjuncts,
114+
}
115+
ctx.Unify(n, adt.Conjuncts)
116+
117+
s = valueSorter{
118+
a: list,
119+
ctx: ctx,
120+
cmp: n,
121+
less: getArc(ctx, n, "less"),
122+
x: getArc(ctx, n, "x"),
123+
y: getArc(ctx, n, "y"),
124+
}
125+
126+
// TODO(perf): see comment in the Less method. If we can determine
127+
// the conjuncts for x and y are idempotent, we can pre finalize here and
128+
// ignore the values in the Less method.
129+
// s.x.UpdateStatus(adt.Finalized)
130+
// s.y.UpdateStatus(adt.Finalized)
131+
132+
return s
133+
}
134+
55135
// Sort sorts data while keeping the original order of equal elements.
56136
// It does O(n*log(n)) comparisons.
57137
//
@@ -64,15 +144,22 @@ func (s *valueSorter) Less(i, j int) bool {
64144
//
65145
// Sort([{a: 2}, {a: 3}, {a: 1}], {x: {}, y: {}, less: x.a < y.a})
66146
func Sort(list []cue.Value, cmp cue.Value) (sorted []cue.Value, err error) {
67-
s := valueSorter{list, cmp, nil}
147+
s := makeValueSorter(list, cmp)
148+
68149
// The input slice is already a copy and that we can modify it safely.
69150
sort.Stable(&s)
70151
return s.ret()
71152
}
72153

154+
func getArc(ctx *adt.OpContext, v *adt.Vertex, s string) *adt.Vertex {
155+
f := ctx.StringLabel(s)
156+
arc, _ := v.GetArc(ctx, f, 0)
157+
return arc
158+
}
159+
73160
// Deprecated: use Sort, which is always stable
74161
func SortStable(list []cue.Value, cmp cue.Value) (sorted []cue.Value, err error) {
75-
s := valueSorter{list, cmp, nil}
162+
s := makeValueSorter(list, cmp)
76163
sort.Stable(&s)
77164
return s.ret()
78165
}
@@ -87,7 +174,7 @@ func SortStrings(a []string) []string {
87174
//
88175
// See Sort for an example comparator.
89176
func IsSorted(list []cue.Value, cmp cue.Value) bool {
90-
s := valueSorter{list, cmp, nil}
177+
s := makeValueSorter(list, cmp)
91178
return sort.IsSorted(&s)
92179
}
93180

Diff for: pkg/list/testdata/gen.txtar

+2
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ t36: error in call to list.Slice: slice bounds out of range:
100100
./in.cue:38:6
101101
t40: error in call to list.Sort: 2 errors in empty disjunction::
102102
./in.cue:46:6
103+
list:13:17
103104
t42: invalid list element 0 in argument 0 to call: cannot use value 1 (int) as string:
104105
./in.cue:48:6
105106
./in.cue:48:24
@@ -109,6 +110,7 @@ t49: error in call to list.Take: negative index:
109110
./in.cue:55:6
110111
t54: error in call to list.Sort: 2 errors in empty disjunction::
111112
./in.cue:60:6
113+
list:13:17
112114

113115
Result:
114116
t1: 2.5

0 commit comments

Comments
 (0)