src/pkg/html/parse_test.go | 20 ++++++++++++++------
src/pkg/html/render.go | 31 +++++++++++++++++++++----------
diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go
index caf3c92bc75bfd5308877030e446f5dc5143b255..067eb26d04f46ad2a6b5be0acef6751e7df30ba8 100644
--- a/src/pkg/html/parse_test.go
+++ b/src/pkg/html/parse_test.go
@@ -160,14 +160,10 @@ if want := string(b); got != want {
t.Errorf("%s test #%d %q, got vs want:\n----\n%s----\n%s----", filename, i, text, got, want)
continue
}
- // Check that rendering and re-parsing results in an identical tree.
- if filename == "tests1.dat" && (i == 30 || i == 77) {
- // Some tests in tests1.dat have such messed-up markup that a correct parse
- // results in a non-conforming tree (one element nested inside another).
- // Therefore when it is rendered and re-parsed, it isn't the same.
- // So we skip rendering on that test.
+ if renderTestBlacklist[text] {
continue
}
+ // Check that rendering and re-parsing results in an identical tree.
pr, pw := io.Pipe()
go func() {
pw.CloseWithError(Render(pw, doc))
@@ -187,3 +183,15 @@ }
}
}
}
+
+// Some test input result in parse trees are not 'well-formed' despite
+// following the HTML5 recovery algorithms. Rendering and re-parsing such a
+// tree will not result in an exact clone of that tree. We blacklist such
+// inputs from the render test.
+var renderTestBlacklist = map[string]bool{
+ // The second will be reparented to the first
's parent. This
+ // results in an whose parent is an , which is not 'well-formed'.
+ `XCY`: true,
+ // The second will be reparented, similar to the case above.
+ `abaaoe`: true,
+}
diff --git a/src/pkg/html/render.go b/src/pkg/html/render.go
index d5dc44843330c760f006d57c943bf4bfe00eec83..0522b6ef92ae193c1452b7a975998b833aff5f3b 100644
--- a/src/pkg/html/render.go
+++ b/src/pkg/html/render.go
@@ -19,17 +19,28 @@ }
// Render renders the parse tree n to the given writer.
//
-// For 'well-formed' parse trees, calling Parse on the output of Render will
-// result in a clone of the original tree.
+// Rendering is done on a 'best effort' basis: calling Parse on the output of
+// Render will always result in something similar to the original tree, but it
+// is not necessarily an exact clone unless the original tree was 'well-formed'.
+// 'Well-formed' is not easily specified; the HTML5 specification is
+// complicated.
//
-// 'Well-formed' is not formally specified, but calling Parse on arbitrary
-// input results in a 'well-formed' parse tree if Parse does not return an
-// error. Programmatically constructed trees are typically also 'well-formed',
-// but it is possible to construct a tree that, when rendered and re-parsed,
-// results in a different tree. A simple example is that a solitary text node
-// would become a tree containing , and elements. Another
-// example is that the programmatic equivalent of "abc" becomes
-// "
abc".
+// Calling Parse on arbitrary input typically results in a 'well-formed' parse
+// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
+// For example, in a 'well-formed' parse tree, no element is a child of
+// another element: parsing "" results in two sibling elements.
+// Similarly, in a 'well-formed' parse tree, no element is a child of a
+//