Skip to content

Commit 33cc9e0

Browse files
committed
internal/encoding/yaml: encode YAML anchors as CUE definitions
This commits supports encoding YAML documents such as: a: &a 3 b: *a To this CUE document: #a: 3 a: #a b: #a Fixes #3818 Signed-off-by: Omri Steiner <[email protected]>
1 parent ee299f0 commit 33cc9e0

File tree

3 files changed

+190
-41
lines changed

3 files changed

+190
-41
lines changed

internal/encoding/yaml/decode.go

Lines changed: 121 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,15 @@ type decoder struct {
6464

6565
// forceNewline ensures that the next position will be on a new line.
6666
forceNewline bool
67+
68+
// anchorFields contains the anchors that are gathered as we walk the YAML nodes.
69+
// these are only added to the AST when we're done processing the whole document.
70+
anchorFields []ast.Field
71+
// anchorNames map anchor nodes to their names.
72+
anchorNames map[*yaml.Node]string
73+
// anchorTakenNames keeps track of anchor names that have been taken.
74+
// It is used to ensure unique anchor names.
75+
anchorTakenNames map[string]struct{}
6776
}
6877

6978
// TODO(mvdan): this can be io.Reader really, except that token.Pos is offset-based,
@@ -83,9 +92,11 @@ func NewDecoder(filename string, b []byte) *decoder {
8392
tokFile := token.NewFile(filename, 0, len(b)+1)
8493
tokFile.SetLinesForContent(b)
8594
return &decoder{
86-
tokFile: tokFile,
87-
tokLines: append(tokFile.Lines(), len(b)),
88-
yamlDecoder: *yaml.NewDecoder(bytes.NewReader(b)),
95+
tokFile: tokFile,
96+
tokLines: append(tokFile.Lines(), len(b)),
97+
yamlDecoder: *yaml.NewDecoder(bytes.NewReader(b)),
98+
anchorNames: make(map[*yaml.Node]string),
99+
anchorTakenNames: make(map[string]struct{}),
89100
}
90101
}
91102

@@ -176,24 +187,35 @@ func Unmarshal(filename string, data []byte) (ast.Expr, error) {
176187
return n, nil
177188
}
178189

179-
func (d *decoder) extract(yn *yaml.Node) (ast.Expr, error) {
180-
d.addHeadCommentsToPending(yn)
181-
var expr ast.Expr
182-
var err error
190+
func (d *decoder) extractNoAnchor(yn *yaml.Node) (ast.Expr, error) {
183191
switch yn.Kind {
184192
case yaml.DocumentNode:
185-
expr, err = d.document(yn)
193+
return d.document(yn)
186194
case yaml.SequenceNode:
187-
expr, err = d.sequence(yn)
195+
return d.sequence(yn)
188196
case yaml.MappingNode:
189-
expr, err = d.mapping(yn)
197+
return d.mapping(yn)
190198
case yaml.ScalarNode:
191-
expr, err = d.scalar(yn)
199+
return d.scalar(yn)
192200
case yaml.AliasNode:
193-
expr, err = d.alias(yn)
201+
return d.referenceAlias(yn)
194202
default:
195203
return nil, d.posErrorf(yn, "unknown yaml node kind: %d", yn.Kind)
196204
}
205+
}
206+
207+
func (d *decoder) extract(yn *yaml.Node) (ast.Expr, error) {
208+
d.addHeadCommentsToPending(yn)
209+
210+
var expr ast.Expr
211+
var err error
212+
213+
if yn.Anchor == "" {
214+
expr, err = d.extractNoAnchor(yn)
215+
} else {
216+
expr, err = d.anchor(yn)
217+
}
218+
197219
if err != nil {
198220
return nil, err
199221
}
@@ -324,7 +346,39 @@ func (d *decoder) document(yn *yaml.Node) (ast.Expr, error) {
324346
if n := len(yn.Content); n != 1 {
325347
return nil, d.posErrorf(yn, "yaml document nodes are meant to have one content node but have %d", n)
326348
}
327-
return d.extract(yn.Content[0])
349+
350+
expr, err := d.extract(yn.Content[0])
351+
if err != nil {
352+
return nil, err
353+
}
354+
355+
return d.addAnchorNodes(expr)
356+
}
357+
358+
// addAnchorNodes prepends anchor nodes at the top of the document.
359+
func (d *decoder) addAnchorNodes(expr ast.Expr) (ast.Expr, error) {
360+
elements := []ast.Decl{}
361+
362+
for _, field := range d.anchorFields {
363+
elements = append(elements, &field)
364+
}
365+
366+
switch x := expr.(type) {
367+
case *ast.StructLit:
368+
x.Elts = append(elements, x.Elts...)
369+
case *ast.ListLit:
370+
if len(elements) > 0 {
371+
expr = &ast.StructLit{
372+
Elts: append(elements, x),
373+
}
374+
}
375+
default:
376+
// If the whole YAML document is not a map / seq, then it can't have anchors.
377+
// maybe assert that `anchorFields` is empty?
378+
break
379+
}
380+
381+
return expr, nil
328382
}
329383

330384
func (d *decoder) sequence(yn *yaml.Node) (ast.Expr, error) {
@@ -458,7 +512,7 @@ func (d *decoder) label(yn *yaml.Node) (ast.Label, error) {
458512
if yn.Alias.Kind != yaml.ScalarNode {
459513
return nil, d.posErrorf(yn, "invalid map key: %v", yn.Alias.ShortTag())
460514
}
461-
expr, err = d.alias(yn)
515+
expr, err = d.inlineAlias(yn)
462516
value = yn.Alias.Value
463517
default:
464518
return nil, d.posErrorf(yn, "invalid map key: %v", yn.ShortTag())
@@ -639,7 +693,10 @@ func (d *decoder) makeNum(yn *yaml.Node, val string, kind token.Token) (expr ast
639693
return expr
640694
}
641695

642-
func (d *decoder) alias(yn *yaml.Node) (ast.Expr, error) {
696+
// inlineAlias expands an alias node in place, returning the expanded node.
697+
// Sometimes we have to resort to this, for example when the alias
698+
// is inside a map key, since CUE does not support structs as map keys.
699+
func (d *decoder) inlineAlias(yn *yaml.Node) (ast.Expr, error) {
643700
if d.extractingAliases[yn] {
644701
// TODO this could actually be allowed in some circumstances.
645702
return nil, d.posErrorf(yn, "anchor %q value contains itself", yn.Value)
@@ -649,11 +706,59 @@ func (d *decoder) alias(yn *yaml.Node) (ast.Expr, error) {
649706
}
650707
d.extractingAliases[yn] = true
651708
var node ast.Expr
652-
node, err := d.extract(yn.Alias)
709+
node, err := d.extractNoAnchor(yn.Alias)
653710
delete(d.extractingAliases, yn)
654711
return node, err
655712
}
656713

714+
// referenceAlias replaces an alias with a reference to the identifier of its anchor.
715+
func (d *decoder) referenceAlias(yn *yaml.Node) (ast.Expr, error) {
716+
anchor, ok := d.anchorNames[yn.Alias]
717+
if !ok {
718+
return nil, d.posErrorf(yn, "anchor %q not found", yn.Alias.Anchor)
719+
}
720+
721+
return &ast.Ident{
722+
NamePos: d.pos(yn),
723+
Name: anchor,
724+
}, nil
725+
}
726+
727+
func (d *decoder) anchor(yn *yaml.Node) (ast.Expr, error) {
728+
var anchorIdent string
729+
730+
// Pick a non-conflicting anchor name.
731+
for i := 1; ; i++ {
732+
if i == 1 {
733+
anchorIdent = "#" + yn.Anchor
734+
} else {
735+
anchorIdent = "#" + yn.Anchor + "_" + strconv.Itoa(i)
736+
}
737+
if _, ok := d.anchorTakenNames[anchorIdent]; !ok {
738+
d.anchorTakenNames[anchorIdent] = struct{}{}
739+
break
740+
}
741+
}
742+
d.anchorNames[yn] = anchorIdent
743+
744+
// Process the node itself, but don't put it into the AST just yet,
745+
// store it for later to be used as an anchor identifier.
746+
pos := d.pos(yn)
747+
expr, err := d.extractNoAnchor(yn)
748+
if err != nil {
749+
return nil, err
750+
}
751+
d.anchorFields = append(d.anchorFields, ast.Field{
752+
Label: &ast.Ident{Name: anchorIdent},
753+
Value: expr,
754+
})
755+
756+
return &ast.Ident{
757+
NamePos: pos,
758+
Name: anchorIdent,
759+
}, nil
760+
}
761+
657762
func labelStr(l ast.Label) string {
658763
switch l := l.(type) {
659764
case *ast.Ident:

internal/encoding/yaml/decode_test.go

Lines changed: 58 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -474,26 +474,67 @@ Null: 1
474474
// Anchors and aliases.
475475
{
476476
"a: &x 1\nb: &y 2\nc: *x\nd: *y\n",
477-
`a: 1
478-
b: 2
479-
c: 1
480-
d: 2`,
477+
`#x: 1
478+
#y: 2
479+
a: #x
480+
b: #y
481+
c: #x
482+
d: #y`,
481483
}, {
482484
"a: &a {c: 1}\nb: *a",
483-
`a: {c: 1}
484-
b: {
485-
c: 1
486-
}`,
485+
`#a: {c: 1}
486+
a: #a
487+
b: #a`,
487488
}, {
488489
"a: &a [1, 2]\nb: *a",
489-
"a: [1, 2]\nb: [1, 2]", // TODO: a: [1, 2], b: a
490+
"#a: [1, 2]\na: #a\nb: #a",
490491
},
491492
{
492493
`a: &a "b"
493494
*a : "c"`,
494-
`a: "b"
495-
b: "c"`,
495+
`#a: "b"
496+
a: #a
497+
b: "c"`,
498+
},
499+
{
500+
`- 3
501+
- &a 4
502+
- *a`,
503+
`#a: 4, [
504+
3,
505+
#a,
506+
#a,
507+
]`,
508+
},
509+
// Test nested anchors
510+
{
511+
`foo: &a
512+
bar: &b
513+
baz: 1
514+
a: *a
515+
b: *b
516+
`,
517+
`#b: {
518+
baz: 1
519+
}
520+
#a: {
521+
bar: #b
522+
}
523+
foo: #a
524+
a: #a
525+
b: #b`,
496526
},
527+
{
528+
`a:
529+
- &b c`,
530+
`#b: "c"
531+
a: [#b]`,
532+
},
533+
// Recursive anchor - make sure we don't infinitely recurse on such input.
534+
{"a: &a\n b: *a\n", `#a: {
535+
b: #a
536+
}
537+
a: #a`},
497538

498539
{
499540
"foo: ''",
@@ -778,10 +819,12 @@ a:
778819
// yaml-test-suite 3GZX: Spec Example 7.1. Alias Nodes
779820
{
780821
"First occurrence: &anchor Foo\nSecond occurrence: *anchor\nOverride anchor: &anchor Bar\nReuse anchor: *anchor\n",
781-
`"First occurrence": "Foo"
782-
"Second occurrence": "Foo"
783-
"Override anchor": "Bar"
784-
"Reuse anchor": "Bar"`,
822+
`#anchor: "Foo"
823+
#anchor_2: "Bar"
824+
"First occurrence": #anchor
825+
"Second occurrence": #anchor
826+
"Override anchor": #anchor_2
827+
"Reuse anchor": #anchor_2`,
785828
},
786829
}
787830

@@ -926,7 +969,6 @@ var unmarshalErrorTests = []struct {
926969
{"v:\n- [A,", "test.yaml:2: did not find expected node content"},
927970
{"a:\n- b: *,", "test.yaml:2: did not find expected alphabetic or numeric character"},
928971
{"a: *b\n", "test.yaml: unknown anchor 'b' referenced"},
929-
{"a: &a\n b: *a\n", `test.yaml:2: anchor "a" value contains itself`},
930972
{"a: &a { b: c }\n*a : foo", "test.yaml:2: invalid map key: !!map"},
931973
{"a: &a [b]\n*a : foo", "test.yaml:2: invalid map key: !!seq"},
932974
{"value: -", "test.yaml: block sequence entries are not allowed in this context"},

internal/encoding/yaml/testdata/merge.out

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
1+
#CENTER: {x: 1, y: 2}
2+
#LEFT: {x: 0, y: 2}
3+
#BIG: {r: 10}
4+
#SMALL: {r: 1}
5+
16
// From http://yaml.org/type/merge.html
27
// Test
38
anchors: {
4-
list: [{
5-
x: 1, y: 2
6-
}, {
7-
x: 0, y: 2
8-
}, {
9-
r: 10
10-
}, {
11-
r: 1
12-
}]
9+
list: [
10+
#CENTER,
11+
#LEFT,
12+
#BIG,
13+
#SMALL,
14+
]
1315
}
1416

1517
// All the following maps are equal:

0 commit comments

Comments
 (0)