Better handling of line comments in nested sequences
Cases like this are now handled properly:
# Header for the whole list
- # Header for some data
- Some data
Fixes #533.
diff --git a/node_test.go b/node_test.go
index e0b1cae..433722d 100644
--- a/node_test.go
+++ b/node_test.go
@@ -948,6 +948,112 @@
}},
},
}, {
+ "# DH1\n\n# HL1\n- - la\n # HB1\n - lb\n",
+ yaml.Node{
+ Kind: yaml.DocumentNode,
+ Line: 4,
+ Column: 1,
+ HeadComment: "# DH1",
+ Content: []*yaml.Node{{
+ Kind: yaml.SequenceNode,
+ Tag: "!!seq",
+ Line: 4,
+ Column: 1,
+ Content: []*yaml.Node{{
+ Kind: yaml.SequenceNode,
+ Tag: "!!seq",
+ Line: 4,
+ Column: 3,
+ HeadComment: "# HL1",
+ Content: []*yaml.Node{{
+ Kind: yaml.ScalarNode,
+ Tag: "!!str",
+ Line: 4,
+ Column: 5,
+ Value: "la",
+ }, {
+ Kind: yaml.ScalarNode,
+ Tag: "!!str",
+ Line: 6,
+ Column: 5,
+ Value: "lb",
+ HeadComment: "# HB1",
+ }},
+ }},
+ }},
+ },
+ }, {
+ "# DH1\n\n# HL1\n- # HA1\n - la\n # HB1\n - lb\n",
+ yaml.Node{
+ Kind: yaml.DocumentNode,
+ Line: 4,
+ Column: 1,
+ HeadComment: "# DH1",
+ Content: []*yaml.Node{{
+ Kind: yaml.SequenceNode,
+ Tag: "!!seq",
+ Line: 4,
+ Column: 1,
+ Content: []*yaml.Node{{
+ Kind: yaml.SequenceNode,
+ Tag: "!!seq",
+ Line: 5,
+ Column: 3,
+ HeadComment: "# HL1",
+ Content: []*yaml.Node{{
+ Kind: yaml.ScalarNode,
+ Tag: "!!str",
+ Line: 5,
+ Column: 5,
+ Value: "la",
+ HeadComment: "# HA1",
+ }, {
+ Kind: yaml.ScalarNode,
+ Tag: "!!str",
+ Line: 7,
+ Column: 5,
+ Value: "lb",
+ HeadComment: "# HB1",
+ }},
+ }},
+ }},
+ },
+ }, {
+ "[decode]# DH1\n\n# HL1\n- # HA1\n\n - la\n # HB1\n - lb\n",
+ yaml.Node{
+ Kind: yaml.DocumentNode,
+ Line: 4,
+ Column: 1,
+ HeadComment: "# DH1",
+ Content: []*yaml.Node{{
+ Kind: yaml.SequenceNode,
+ Tag: "!!seq",
+ Line: 4,
+ Column: 1,
+ Content: []*yaml.Node{{
+ Kind: yaml.SequenceNode,
+ Tag: "!!seq",
+ Line: 6,
+ Column: 3,
+ HeadComment: "# HL1\n# HA1",
+ Content: []*yaml.Node{{
+ Kind: yaml.ScalarNode,
+ Tag: "!!str",
+ Line: 6,
+ Column: 5,
+ Value: "la",
+ }, {
+ Kind: yaml.ScalarNode,
+ Tag: "!!str",
+ Line: 8,
+ Column: 5,
+ Value: "lb",
+ HeadComment: "# HB1",
+ }},
+ }},
+ }},
+ },
+ }, {
"# DH1\n\n# HA1\nka:\n # HB1\n kb:\n # HC1\n # HC2\n - lc # IC\n # FC1\n # FC2\n\n # HD1\n - ld # ID\n # FD1\n\n# DF1\n",
yaml.Node{
Kind: yaml.DocumentNode,
@@ -2090,7 +2196,7 @@
if strings.Contains(item.yaml, "#") {
var buf bytes.Buffer
fprintComments(&buf, &item.node, " ")
- c.Logf(" comments:\n%s", buf.Bytes())
+ c.Logf(" expected comments:\n%s", buf.Bytes())
}
decode := true
@@ -2110,6 +2216,11 @@
var node yaml.Node
err := yaml.Unmarshal([]byte(testYaml), &node)
c.Assert(err, IsNil)
+ if strings.Contains(item.yaml, "#") {
+ var buf bytes.Buffer
+ fprintComments(&buf, &node, " ")
+ c.Logf(" obtained comments:\n%s", buf.Bytes())
+ }
c.Assert(node, DeepEquals, item.node)
}
if encode {
diff --git a/parserc.go b/parserc.go
index ec25faa..aea9050 100644
--- a/parserc.go
+++ b/parserc.go
@@ -423,6 +423,7 @@
parser.line_comment = nil
parser.foot_comment = nil
parser.tail_comment = nil
+ parser.stem_comment = nil
}
// Parse the productions:
@@ -629,6 +630,10 @@
implicit: implicit,
style: yaml_style_t(yaml_BLOCK_SEQUENCE_STYLE),
}
+ if parser.stem_comment != nil {
+ event.head_comment = parser.stem_comment
+ parser.stem_comment = nil
+ }
return true
}
if block && token.typ == yaml_BLOCK_MAPPING_START_TOKEN {
@@ -689,11 +694,25 @@
if token.typ == yaml_BLOCK_ENTRY_TOKEN {
mark := token.end_mark
+ prior_head := len(parser.head_comment)
skip_token(parser)
token = peek_token(parser)
if token == nil {
return false
}
+ if prior_head > 0 && token.typ == yaml_BLOCK_SEQUENCE_START_TOKEN {
+ // [Go] It's a sequence under a sequence entry, so the former head comment
+ // is for the list itself, not the first list item under it.
+ parser.stem_comment = parser.head_comment[:prior_head]
+ if len(parser.head_comment) == prior_head {
+ parser.head_comment = nil
+ } else {
+ // Copy suffix to prevent very strange bugs if someone ever appends
+ // further bytes to the prefix in the stem_comment slice above.
+ parser.head_comment = append([]byte(nil), parser.head_comment[prior_head+1:]...)
+ }
+
+ }
if token.typ != yaml_BLOCK_ENTRY_TOKEN && token.typ != yaml_BLOCK_END_TOKEN {
parser.states = append(parser.states, yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE)
return yaml_parser_parse_node(parser, event, true, false)
diff --git a/scannerc.go b/scannerc.go
index 2e50813..7c78d23 100644
--- a/scannerc.go
+++ b/scannerc.go
@@ -660,11 +660,11 @@
// Check if we really need to fetch more tokens.
need_more_tokens := false
- // [Go] The comment parsing logic requires a lookahead of one token
- // in block style or two tokens in flow style so that the foot
- // comments may be parsed in time of associating them with the tokens
- // that are parsed before them.
- if parser.tokens_head >= len(parser.tokens)-1 || parser.flow_level > 0 && parser.tokens_head >= len(parser.tokens)-2 {
+ // [Go] The comment parsing logic requires a lookahead of two tokens
+ // so that foot comments may be parsed in time of associating them
+ // with the tokens that are parsed before them, and also for line
+ // comments to be transformed into head comments in some edge cases.
+ if parser.tokens_head >= len(parser.tokens)-2 {
need_more_tokens = true
} else {
// Check if any potential simple key may occupy the head position.
@@ -1558,6 +1558,28 @@
}
}
+ // Check if we just had a line comment under a sequence entry that
+ // looks more like a header to the following content. Similar to this:
+ //
+ // - # The comment
+ // - Some data
+ //
+ // If so, transform the line comment to a head comment and reposition.
+ if len(parser.comments) > 0 && len(parser.tokens) > 0 {
+ token := parser.tokens[len(parser.tokens)-1]
+ comment := &parser.comments[len(parser.comments)-1]
+ if token.typ == yaml_BLOCK_ENTRY_TOKEN && len(comment.line) > 0 && !is_break(parser.buffer, parser.buffer_pos) {
+ // If it was in the prior line, reposition so it becomes a
+ // header of the follow up token. Otherwise, keep it in place
+ // so it becomes a header of the former.
+ comment.head = comment.line
+ comment.line = nil
+ if comment.start_mark.line == parser.mark.line-1 {
+ comment.token_mark = parser.mark
+ }
+ }
+ }
+
// Eat a comment until a line break.
if parser.buffer[parser.buffer_pos] == '#' {
if !yaml_parser_scan_comments(parser, scan_mark) {
@@ -2233,8 +2255,15 @@
}
}
if parser.buffer[parser.buffer_pos] == '#' {
- if !yaml_parser_scan_line_comment(parser, start_mark) {
- return false
+ // TODO Test this and then re-enable it.
+ //if !yaml_parser_scan_line_comment(parser, start_mark) {
+ // return false
+ //}
+ for !is_breakz(parser.buffer, parser.buffer_pos) {
+ skip(parser)
+ if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
+ return false
+ }
}
}
@@ -2803,8 +2832,8 @@
return true
}
- parser.comments = append(parser.comments, yaml_comment_t{token_mark: token_mark})
- comment := &parser.comments[len(parser.comments)-1].line
+ var start_mark yaml_mark_t
+ var text []byte
for peek := 0; peek < 512; peek++ {
if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
@@ -2814,11 +2843,6 @@
continue
}
if parser.buffer[parser.buffer_pos+peek] == '#' {
- if len(*comment) > 0 {
- *comment = append(*comment, '\n')
- }
-
- // Consume until after the consumed comment line.
seen := parser.mark.index+peek
for {
if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
@@ -2834,7 +2858,10 @@
skip_line(parser)
} else {
if parser.mark.index >= seen {
- *comment = append(*comment, parser.buffer[parser.buffer_pos])
+ if len(text) == 0 {
+ start_mark = parser.mark
+ }
+ text = append(text, parser.buffer[parser.buffer_pos])
}
skip(parser)
}
@@ -2842,6 +2869,13 @@
}
break
}
+ if len(text) > 0 {
+ parser.comments = append(parser.comments, yaml_comment_t{
+ token_mark: token_mark,
+ start_mark: start_mark,
+ line: text,
+ })
+ }
return true
}
diff --git a/yamlh.go b/yamlh.go
index 65fb0df..d5ea07c 100644
--- a/yamlh.go
+++ b/yamlh.go
@@ -600,6 +600,7 @@
line_comment []byte // The current line comments
foot_comment []byte // The current foot comments
tail_comment []byte // Foot comment that happens at the end of a block.
+ stem_comment []byte // Comment in item preceding a nested structure (list inside list item, etc)
comments []yaml_comment_t // The folded comments for all parsed tokens
comments_head int