Skip to content

Commit 0e46f6f

Browse files
committed
internal/encoding/yaml: encode YAML anchors as CUE definitions
This commits supports encoding YAML documents such as: a: &a 3 b: *a To this CUE document: #a: 3 a: #a b: #a Fixes #3818 Signed-off-by: Omri Steiner <[email protected]>
1 parent ee299f0 commit 0e46f6f

File tree

3 files changed

+185
-41
lines changed

3 files changed

+185
-41
lines changed

internal/encoding/yaml/decode.go

Lines changed: 122 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,15 @@ type decoder struct {
6464

6565
// forceNewline ensures that the next position will be on a new line.
6666
forceNewline bool
67+
68+
// anchorFields contains the anchors that are gathered as we walk the YAML nodes.
69+
// these are only added to the AST when we're done processing the whole document.
70+
anchorFields []ast.Field
71+
// anchorNames map anchor nodes to their names.
72+
anchorNames map[*yaml.Node]string
73+
// anchorTakenNames keeps track of anchor names that have been taken.
74+
// It is used to ensure unique anchor names.
75+
anchorTakenNames map[string]struct{}
6776
}
6877

6978
// TODO(mvdan): this can be io.Reader really, except that token.Pos is offset-based,
@@ -83,9 +92,11 @@ func NewDecoder(filename string, b []byte) *decoder {
8392
tokFile := token.NewFile(filename, 0, len(b)+1)
8493
tokFile.SetLinesForContent(b)
8594
return &decoder{
86-
tokFile: tokFile,
87-
tokLines: append(tokFile.Lines(), len(b)),
88-
yamlDecoder: *yaml.NewDecoder(bytes.NewReader(b)),
95+
tokFile: tokFile,
96+
tokLines: append(tokFile.Lines(), len(b)),
97+
yamlDecoder: *yaml.NewDecoder(bytes.NewReader(b)),
98+
anchorNames: make(map[*yaml.Node]string),
99+
anchorTakenNames: make(map[string]struct{}),
89100
}
90101
}
91102

@@ -176,24 +187,35 @@ func Unmarshal(filename string, data []byte) (ast.Expr, error) {
176187
return n, nil
177188
}
178189

179-
func (d *decoder) extract(yn *yaml.Node) (ast.Expr, error) {
180-
d.addHeadCommentsToPending(yn)
181-
var expr ast.Expr
182-
var err error
190+
func (d *decoder) extractNoAnchor(yn *yaml.Node) (ast.Expr, error) {
183191
switch yn.Kind {
184192
case yaml.DocumentNode:
185-
expr, err = d.document(yn)
193+
return d.document(yn)
186194
case yaml.SequenceNode:
187-
expr, err = d.sequence(yn)
195+
return d.sequence(yn)
188196
case yaml.MappingNode:
189-
expr, err = d.mapping(yn)
197+
return d.mapping(yn)
190198
case yaml.ScalarNode:
191-
expr, err = d.scalar(yn)
199+
return d.scalar(yn)
192200
case yaml.AliasNode:
193-
expr, err = d.alias(yn)
201+
return d.referenceAlias(yn)
194202
default:
195203
return nil, d.posErrorf(yn, "unknown yaml node kind: %d", yn.Kind)
196204
}
205+
}
206+
207+
func (d *decoder) extract(yn *yaml.Node) (ast.Expr, error) {
208+
d.addHeadCommentsToPending(yn)
209+
210+
var expr ast.Expr
211+
var err error
212+
213+
if yn.Anchor == "" {
214+
expr, err = d.extractNoAnchor(yn)
215+
} else {
216+
expr, err = d.anchor(yn)
217+
}
218+
197219
if err != nil {
198220
return nil, err
199221
}
@@ -324,7 +346,41 @@ func (d *decoder) document(yn *yaml.Node) (ast.Expr, error) {
324346
if n := len(yn.Content); n != 1 {
325347
return nil, d.posErrorf(yn, "yaml document nodes are meant to have one content node but have %d", n)
326348
}
327-
return d.extract(yn.Content[0])
349+
350+
expr, err := d.extract(yn.Content[0])
351+
if err != nil {
352+
return nil, err
353+
}
354+
355+
return d.addAnchorNodes(expr)
356+
}
357+
358+
// addAnchorNodes prepends anchor nodes at the top of the document.
359+
func (d *decoder) addAnchorNodes(expr ast.Expr) (ast.Expr, error) {
360+
elements := []ast.Decl{}
361+
362+
for _, field := range d.anchorFields {
363+
elements = append(elements, &field)
364+
}
365+
366+
switch x := expr.(type) {
367+
case *ast.StructLit:
368+
x.Elts = append(elements, x.Elts...)
369+
break
370+
case *ast.ListLit:
371+
if len(elements) > 0 {
372+
expr = &ast.StructLit{
373+
Elts: append(elements, x),
374+
}
375+
}
376+
break
377+
default:
378+
// If the whole YAML document is not a map / seq, then it can't have anchors.
379+
// maybe assert that `anchorFields` is empty?
380+
break
381+
}
382+
383+
return expr, nil
328384
}
329385

330386
func (d *decoder) sequence(yn *yaml.Node) (ast.Expr, error) {
@@ -458,7 +514,7 @@ func (d *decoder) label(yn *yaml.Node) (ast.Label, error) {
458514
if yn.Alias.Kind != yaml.ScalarNode {
459515
return nil, d.posErrorf(yn, "invalid map key: %v", yn.Alias.ShortTag())
460516
}
461-
expr, err = d.alias(yn)
517+
expr, err = d.inlineAlias(yn)
462518
value = yn.Alias.Value
463519
default:
464520
return nil, d.posErrorf(yn, "invalid map key: %v", yn.ShortTag())
@@ -639,7 +695,10 @@ func (d *decoder) makeNum(yn *yaml.Node, val string, kind token.Token) (expr ast
639695
return expr
640696
}
641697

642-
func (d *decoder) alias(yn *yaml.Node) (ast.Expr, error) {
698+
// inlineAlias expands an alias node in place, returning the expanded node.
699+
// Sometimes we have to resort to this, for example when the alias
700+
// is inside a map key, since CUE does not support structs as map keys.
701+
func (d *decoder) inlineAlias(yn *yaml.Node) (ast.Expr, error) {
643702
if d.extractingAliases[yn] {
644703
// TODO this could actually be allowed in some circumstances.
645704
return nil, d.posErrorf(yn, "anchor %q value contains itself", yn.Value)
@@ -649,11 +708,58 @@ func (d *decoder) alias(yn *yaml.Node) (ast.Expr, error) {
649708
}
650709
d.extractingAliases[yn] = true
651710
var node ast.Expr
652-
node, err := d.extract(yn.Alias)
711+
node, err := d.extractNoAnchor(yn.Alias)
653712
delete(d.extractingAliases, yn)
654713
return node, err
655714
}
656715

716+
// referenceAlias replaces an alias with a reference to the identifier of its anchor.
717+
func (d *decoder) referenceAlias(yn *yaml.Node) (ast.Expr, error) {
718+
anchor, ok := d.anchorNames[yn.Alias]
719+
if !ok {
720+
return nil, d.posErrorf(yn, "anchor %q not found", yn.Alias.Anchor)
721+
}
722+
723+
return &ast.Ident{
724+
NamePos: d.pos(yn),
725+
Name: anchor,
726+
}, nil
727+
}
728+
729+
func (d *decoder) anchor(yn *yaml.Node) (ast.Expr, error) {
730+
var anchorIdent string
731+
732+
// Pick a non-conflicting anchor name.
733+
for i := 1; ; i++ {
734+
if i == 1 {
735+
anchorIdent = "#" + yn.Anchor
736+
} else {
737+
anchorIdent = "#" + yn.Anchor + "_" + strconv.Itoa(i)
738+
}
739+
if _, ok := d.anchorTakenNames[anchorIdent]; !ok {
740+
d.anchorTakenNames[anchorIdent] = struct{}{}
741+
break
742+
}
743+
}
744+
d.anchorNames[yn] = anchorIdent
745+
746+
// Process the node itself, but don't put it into the AST just yet,
747+
// store it for later to be used as an anchor identifier.
748+
expr, err := d.extractNoAnchor(yn)
749+
if err != nil {
750+
return nil, err
751+
}
752+
d.anchorFields = append(d.anchorFields, ast.Field{
753+
Label: &ast.Ident{Name: anchorIdent},
754+
Value: expr,
755+
})
756+
757+
return &ast.Ident{
758+
NamePos: d.pos(yn),
759+
Name: anchorIdent,
760+
}, nil
761+
}
762+
657763
func labelStr(l ast.Label) string {
658764
switch l := l.(type) {
659765
case *ast.Ident:

internal/encoding/yaml/decode_test.go

Lines changed: 48 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -474,26 +474,57 @@ Null: 1
474474
// Anchors and aliases.
475475
{
476476
"a: &x 1\nb: &y 2\nc: *x\nd: *y\n",
477-
`a: 1
478-
b: 2
479-
c: 1
480-
d: 2`,
477+
`#x: 1
478+
#y: 2
479+
a: #x
480+
b: #y
481+
c: #x
482+
d: #y`,
481483
}, {
482484
"a: &a {c: 1}\nb: *a",
483-
`a: {c: 1}
484-
b: {
485-
c: 1
486-
}`,
485+
`#a: {c: 1}
486+
a: #a
487+
b: #a`,
487488
}, {
488489
"a: &a [1, 2]\nb: *a",
489-
"a: [1, 2]\nb: [1, 2]", // TODO: a: [1, 2], b: a
490+
"#a: [1, 2]\na: #a\nb: #a",
490491
},
491492
{
492493
`a: &a "b"
493494
*a : "c"`,
494-
`a: "b"
495-
b: "c"`,
495+
`#a: "b"
496+
a: #a
497+
b: "c"`,
496498
},
499+
// Test nested anchors
500+
{
501+
`foo: &a
502+
bar: &b
503+
baz: 1
504+
a: *a
505+
b: *b
506+
`,
507+
`#b: {
508+
baz: 1
509+
}
510+
#a: {
511+
bar: #b
512+
}
513+
foo: #a
514+
a: #a
515+
b: #b`,
516+
},
517+
{
518+
`a:
519+
- &b c`,
520+
`#b: "c"
521+
a: [#b]`,
522+
},
523+
// Recursive anchor - make sure we don't infinitely recurse on such input.
524+
{"a: &a\n b: *a\n", `#a: {
525+
b: #a
526+
}
527+
a: #a`},
497528

498529
{
499530
"foo: ''",
@@ -778,10 +809,12 @@ a:
778809
// yaml-test-suite 3GZX: Spec Example 7.1. Alias Nodes
779810
{
780811
"First occurrence: &anchor Foo\nSecond occurrence: *anchor\nOverride anchor: &anchor Bar\nReuse anchor: *anchor\n",
781-
`"First occurrence": "Foo"
782-
"Second occurrence": "Foo"
783-
"Override anchor": "Bar"
784-
"Reuse anchor": "Bar"`,
812+
`#anchor: "Foo"
813+
#anchor_2: "Bar"
814+
"First occurrence": #anchor
815+
"Second occurrence": #anchor
816+
"Override anchor": #anchor_2
817+
"Reuse anchor": #anchor_2`,
785818
},
786819
}
787820

@@ -926,7 +959,6 @@ var unmarshalErrorTests = []struct {
926959
{"v:\n- [A,", "test.yaml:2: did not find expected node content"},
927960
{"a:\n- b: *,", "test.yaml:2: did not find expected alphabetic or numeric character"},
928961
{"a: *b\n", "test.yaml: unknown anchor 'b' referenced"},
929-
{"a: &a\n b: *a\n", `test.yaml:2: anchor "a" value contains itself`},
930962
{"a: &a { b: c }\n*a : foo", "test.yaml:2: invalid map key: !!map"},
931963
{"a: &a [b]\n*a : foo", "test.yaml:2: invalid map key: !!seq"},
932964
{"value: -", "test.yaml: block sequence entries are not allowed in this context"},

internal/encoding/yaml/testdata/merge.out

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
1+
#CENTER: {
2+
x: 1, y: 2
3+
}
4+
#LEFT: {
5+
x: 0, y: 2
6+
}
7+
#BIG: {
8+
r: 10
9+
}
10+
#SMALL: {
11+
r: 1
12+
}
13+
114
// From http://yaml.org/type/merge.html
215
// Test
316
anchors: {
4-
list: [{
5-
x: 1, y: 2
6-
}, {
7-
x: 0, y: 2
8-
}, {
9-
r: 10
10-
}, {
11-
r: 1
12-
}]
17+
list: [#CENTER, #LEFT, #BIG, #SMALL,
18+
]
1319
}
1420

1521
// All the following maps are equal:

0 commit comments

Comments
 (0)