Better handling of line comments in nested sequences Cases like this are now handled properly: # Header for the whole list - # Header for some data - Some data Fixes #533.

commit: 0b070bb63a1840d358e283258faa3b4022ed0396 [log] [tgz]
author: Gustavo Niemeyer <gustavo@niemeyer.net> Thu Nov 07 17:52:35 2019 +0000
committer: Gustavo Niemeyer <gustavo@niemeyer.net> Thu Nov 07 17:52:35 2019 +0000
tree: 713df3b5eb6c98b93fa355de0bbacd3bc6d581c3
parent: e228e37189d388c1bff077892b97197221e505cb [diff]
diff --git a/node_test.go b/node_test.go
index e0b1cae..433722d 100644
--- a/node_test.go
+++ b/node_test.go

@@ -948,6 +948,112 @@
 			}},
 		},
 	}, {
+		"# DH1\n\n# HL1\n- - la\n  # HB1\n  - lb\n",
+		yaml.Node{
+			Kind:   yaml.DocumentNode,
+			Line:   4,
+			Column: 1,
+			HeadComment: "# DH1",
+			Content: []*yaml.Node{{
+				Kind:   yaml.SequenceNode,
+				Tag:    "!!seq",
+				Line:   4,
+				Column: 1,
+				Content: []*yaml.Node{{
+					Kind:        yaml.SequenceNode,
+					Tag:         "!!seq",
+					Line:        4,
+					Column:      3,
+					HeadComment: "# HL1",
+					Content: []*yaml.Node{{
+						Kind:   yaml.ScalarNode,
+						Tag:    "!!str",
+						Line:   4,
+						Column: 5,
+						Value:  "la",
+					}, {
+						Kind:        yaml.ScalarNode,
+						Tag:         "!!str",
+						Line:        6,
+						Column:      5,
+						Value:       "lb",
+						HeadComment: "# HB1",
+					}},
+				}},
+			}},
+		},
+	}, {
+		"# DH1\n\n# HL1\n- # HA1\n  - la\n  # HB1\n  - lb\n",
+		yaml.Node{
+			Kind:   yaml.DocumentNode,
+			Line:   4,
+			Column: 1,
+			HeadComment: "# DH1",
+			Content: []*yaml.Node{{
+				Kind:   yaml.SequenceNode,
+				Tag:    "!!seq",
+				Line:   4,
+				Column: 1,
+				Content: []*yaml.Node{{
+					Kind:        yaml.SequenceNode,
+					Tag:         "!!seq",
+					Line:        5,
+					Column:      3,
+					HeadComment: "# HL1",
+					Content: []*yaml.Node{{
+						Kind:   yaml.ScalarNode,
+						Tag:    "!!str",
+						Line:   5,
+						Column: 5,
+						Value:  "la",
+						HeadComment: "# HA1",
+					}, {
+						Kind:        yaml.ScalarNode,
+						Tag:         "!!str",
+						Line:        7,
+						Column:      5,
+						Value:       "lb",
+						HeadComment: "# HB1",
+					}},
+				}},
+			}},
+		},
+	}, {
+		"[decode]# DH1\n\n# HL1\n- # HA1\n\n  - la\n  # HB1\n  - lb\n",
+		yaml.Node{
+			Kind:   yaml.DocumentNode,
+			Line:   4,
+			Column: 1,
+			HeadComment: "# DH1",
+			Content: []*yaml.Node{{
+				Kind:   yaml.SequenceNode,
+				Tag:    "!!seq",
+				Line:   4,
+				Column: 1,
+				Content: []*yaml.Node{{
+					Kind:        yaml.SequenceNode,
+					Tag:         "!!seq",
+					Line:        6,
+					Column:      3,
+					HeadComment: "# HL1\n# HA1",
+					Content: []*yaml.Node{{
+						Kind:   yaml.ScalarNode,
+						Tag:    "!!str",
+						Line:   6,
+						Column: 5,
+						Value:  "la",
+					}, {
+						Kind:        yaml.ScalarNode,
+						Tag:         "!!str",
+						Line:        8,
+						Column:      5,
+						Value:       "lb",
+						HeadComment: "# HB1",
+					}},
+				}},
+			}},
+		},
+	}, {
 		"# DH1\n\n# HA1\nka:\n  # HB1\n  kb:\n  # HC1\n  # HC2\n  - lc # IC\n  # FC1\n  # FC2\n\n  # HD1\n  - ld # ID\n  # FD1\n\n# DF1\n",
 		yaml.Node{
 			Kind:        yaml.DocumentNode,
@@ -2090,7 +2196,7 @@
 		if strings.Contains(item.yaml, "#") {
 			var buf bytes.Buffer
 			fprintComments(&buf, &item.node, "    ")
-			c.Logf("  comments:\n%s", buf.Bytes())
+			c.Logf("  expected comments:\n%s", buf.Bytes())
 		}
 
 		decode := true
@@ -2110,6 +2216,11 @@
 			var node yaml.Node
 			err := yaml.Unmarshal([]byte(testYaml), &node)
 			c.Assert(err, IsNil)
+			if strings.Contains(item.yaml, "#") {
+				var buf bytes.Buffer
+				fprintComments(&buf, &node, "    ")
+				c.Logf("  obtained comments:\n%s", buf.Bytes())
+			}
 			c.Assert(node, DeepEquals, item.node)
 		}
 		if encode {

diff --git a/parserc.go b/parserc.go
index ec25faa..aea9050 100644
--- a/parserc.go
+++ b/parserc.go

@@ -423,6 +423,7 @@
 	parser.line_comment = nil
 	parser.foot_comment = nil
 	parser.tail_comment = nil
+	parser.stem_comment = nil
 }
 
 // Parse the productions:
@@ -629,6 +630,10 @@
 			implicit:   implicit,
 			style:      yaml_style_t(yaml_BLOCK_SEQUENCE_STYLE),
 		}
+		if parser.stem_comment != nil {
+			event.head_comment = parser.stem_comment
+			parser.stem_comment = nil
+		}
 		return true
 	}
 	if block && token.typ == yaml_BLOCK_MAPPING_START_TOKEN {
@@ -689,11 +694,25 @@
 
 	if token.typ == yaml_BLOCK_ENTRY_TOKEN {
 		mark := token.end_mark
+		prior_head := len(parser.head_comment)
 		skip_token(parser)
 		token = peek_token(parser)
 		if token == nil {
 			return false
 		}
+		if prior_head > 0 && token.typ == yaml_BLOCK_SEQUENCE_START_TOKEN {
+			// [Go] It's a sequence under a sequence entry, so the former head comment
+			//      is for the list itself, not the first list item under it.
+			parser.stem_comment = parser.head_comment[:prior_head]
+			if len(parser.head_comment) == prior_head {
+				parser.head_comment = nil
+			} else {
+				// Copy suffix to prevent very strange bugs if someone ever appends
+				// further bytes to the prefix in the stem_comment slice above.
+				parser.head_comment = append([]byte(nil), parser.head_comment[prior_head+1:]...)
+			}
+
+		}
 		if token.typ != yaml_BLOCK_ENTRY_TOKEN && token.typ != yaml_BLOCK_END_TOKEN {
 			parser.states = append(parser.states, yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE)
 			return yaml_parser_parse_node(parser, event, true, false)

diff --git a/scannerc.go b/scannerc.go
index 2e50813..7c78d23 100644
--- a/scannerc.go
+++ b/scannerc.go

@@ -660,11 +660,11 @@
 		// Check if we really need to fetch more tokens.
 		need_more_tokens := false
 
-		// [Go] The comment parsing logic requires a lookahead of one token
-		// in block style or two tokens in flow style so that the foot
-		// comments may be parsed in time of associating them with the tokens
-		// that are parsed before them.
-		if parser.tokens_head >= len(parser.tokens)-1 || parser.flow_level > 0 && parser.tokens_head >= len(parser.tokens)-2 {
+		// [Go] The comment parsing logic requires a lookahead of two tokens
+		// so that foot comments may be parsed in time of associating them
+		// with the tokens that are parsed before them, and also for line
+		// comments to be transformed into head comments in some edge cases.
+		if parser.tokens_head >= len(parser.tokens)-2 {
 			need_more_tokens = true
 		} else {
 			// Check if any potential simple key may occupy the head position.
@@ -1558,6 +1558,28 @@
 			}
 		}
 
+		// Check if we just had a line comment under a sequence entry that
+		// looks more like a header to the following content. Similar to this:
+		//
+		// - # The comment
+		//   - Some data
+		//
+		// If so, transform the line comment to a head comment and reposition.
+		if len(parser.comments) > 0 && len(parser.tokens) > 0 {
+			token := parser.tokens[len(parser.tokens)-1]
+			comment := &parser.comments[len(parser.comments)-1]
+			if token.typ == yaml_BLOCK_ENTRY_TOKEN && len(comment.line) > 0 && !is_break(parser.buffer, parser.buffer_pos) {
+				// If it was in the prior line, reposition so it becomes a
+				// header of the follow up token. Otherwise, keep it in place
+				// so it becomes a header of the former.
+				comment.head = comment.line
+				comment.line = nil
+				if comment.start_mark.line == parser.mark.line-1 {
+					comment.token_mark = parser.mark
+				}
+			}
+		}
+
 		// Eat a comment until a line break.
 		if parser.buffer[parser.buffer_pos] == '#' {
 			if !yaml_parser_scan_comments(parser, scan_mark) {
@@ -2233,8 +2255,15 @@
 		}
 	}
 	if parser.buffer[parser.buffer_pos] == '#' {
-		if !yaml_parser_scan_line_comment(parser, start_mark) {
-			return false
+		// TODO Test this and then re-enable it.
+		//if !yaml_parser_scan_line_comment(parser, start_mark) {
+		//	return false
+		//}
+		for !is_breakz(parser.buffer, parser.buffer_pos) {
+			skip(parser)
+			if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
+				return false
+			}
 		}
 	}
 
@@ -2803,8 +2832,8 @@
 		return true
 	}
 
-	parser.comments = append(parser.comments, yaml_comment_t{token_mark: token_mark})
-	comment := &parser.comments[len(parser.comments)-1].line
+	var start_mark yaml_mark_t
+	var text []byte
 
 	for peek := 0; peek < 512; peek++ {
 		if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
@@ -2814,11 +2843,6 @@
 			continue
 		}
 		if parser.buffer[parser.buffer_pos+peek] == '#' {
-			if len(*comment) > 0 {
-				*comment = append(*comment, '\n')
-			}
-
-			// Consume until after the consumed comment line.
 			seen := parser.mark.index+peek
 			for {
 				if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
@@ -2834,7 +2858,10 @@
 					skip_line(parser)
 				} else {
 					if parser.mark.index >= seen {
-						*comment = append(*comment, parser.buffer[parser.buffer_pos])
+						if len(text) == 0 {
+							start_mark = parser.mark
+						}
+						text = append(text, parser.buffer[parser.buffer_pos])
 					}
 					skip(parser)
 				}
@@ -2842,6 +2869,13 @@
 		}
 		break
 	}
+	if len(text) > 0 {
+		parser.comments = append(parser.comments, yaml_comment_t{
+			token_mark: token_mark,
+			start_mark: start_mark,
+			line: text,
+		})
+	}
 	return true
 }
 

diff --git a/yamlh.go b/yamlh.go
index 65fb0df..d5ea07c 100644
--- a/yamlh.go
+++ b/yamlh.go

@@ -600,6 +600,7 @@
 	line_comment []byte // The current line comments
 	foot_comment []byte // The current foot comments
 	tail_comment []byte // Foot comment that happens at the end of a block.
+	stem_comment []byte // Comment in item preceding a nested structure (list inside list item, etc)
 
 	comments      []yaml_comment_t // The folded comments for all parsed tokens
 	comments_head int
commit	0b070bb63a1840d358e283258faa3b4022ed0396	[log] [tgz]
author	Gustavo Niemeyer <gustavo@niemeyer.net>	Thu Nov 07 17:52:35 2019 +0000
committer	Gustavo Niemeyer <gustavo@niemeyer.net>	Thu Nov 07 17:52:35 2019 +0000
tree	713df3b5eb6c98b93fa355de0bbacd3bc6d581c3
parent	e228e37189d388c1bff077892b97197221e505cb [diff]