src/pkg/exp/template/html/Makefile | 1 + src/pkg/exp/template/html/context.go | 52 ++++++++++++++++++++++++++++++++++++++++++++++------ src/pkg/exp/template/html/escape.go | 285 ++++++++++++++++++++++++++++++++++++++++++++++++----- src/pkg/exp/template/html/escape_test.go | 280 +++++++++++++++++++++++++++++++++++++++++++++++------ src/pkg/exp/template/html/js.go | 344 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/pkg/exp/template/html/js_test.go | 352 +++++++++++++++++++++++++++++++++++++++++++++++++++++ diff --git a/src/pkg/exp/template/html/Makefile b/src/pkg/exp/template/html/Makefile index 6d8ff5cd14a539fb8dbb852accdb2a1471076b65..3a93bebc091e37728df87cc33db02767d4ceb5d7 100644 --- a/src/pkg/exp/template/html/Makefile +++ b/src/pkg/exp/template/html/Makefile @@ -8,5 +8,6 @@ TARG=exp/template/html GOFILES=\ context.go\ escape.go\ + js.go\ include ../../../../Make.pkg diff --git a/src/pkg/exp/template/html/context.go b/src/pkg/exp/template/html/context.go index d8fed158677402bf2e7044c931df954456c331de..428b3d0b3af785261a5991fed7bd6cfa9c0f5c6f 100644 --- a/src/pkg/exp/template/html/context.go +++ b/src/pkg/exp/template/html/context.go @@ -19,13 +19,14 @@ type context struct { state state delim delim urlPart urlPart + jsCtx jsCtx errLine int errStr string } // eq returns whether two contexts are equal. func (c context) eq(d context) bool { - return c.state == d.state && c.delim == d.delim && c.urlPart == d.urlPart && c.errLine == d.errLine && c.errStr == d.errStr + return c.state == d.state && c.delim == d.delim && c.urlPart == d.urlPart && c.jsCtx == d.jsCtx && c.errLine == d.errLine && c.errStr == d.errStr } // state describes a high-level HTML parser state. @@ -50,17 +51,35 @@ // stateAttr occurs inside an HTML attribute whose content is text. stateAttr // stateURL occurs inside an HTML attribute whose content is a URL. stateURL + // stateJS occurs inside an event handler or script element. + stateJS + // stateJSDqStr occurs inside a JavaScript double quoted string. + stateJSDqStr + // stateJSSqStr occurs inside a JavaScript single quoted string. + stateJSSqStr + // stateJSRegexp occurs inside a JavaScript regexp literal. + stateJSRegexp + // stateJSBlockCmt occurs inside a JavaScript /* block comment */. + stateJSBlockCmt + // stateJSLineCmt occurs inside a JavaScript // line comment. + stateJSLineCmt // stateError is an infectious error state outside any valid // HTML/CSS/JS construct. stateError ) var stateNames = [...]string{ - stateText: "stateText", - stateTag: "stateTag", - stateAttr: "stateAttr", - stateURL: "stateURL", - stateError: "stateError", + stateText: "stateText", + stateTag: "stateTag", + stateAttr: "stateAttr", + stateURL: "stateURL", + stateJS: "stateJS", + stateJSDqStr: "stateJSDqStr", + stateJSSqStr: "stateJSSqStr", + stateJSRegexp: "stateJSRegexp", + stateJSBlockCmt: "stateJSBlockCmt", + stateJSLineCmt: "stateJSLineCmt", + stateError: "stateError", } func (s state) String() string { @@ -131,3 +150,24 @@ return urlPartNames[u] } return fmt.Sprintf("illegal urlPart %d", u) } + +// jsCtx determines whether a '/' starts a regular expression literal or a +// division operator. +type jsCtx uint8 + +const ( + // jsCtxRegexp occurs where a '/' would start a regexp literal. + jsCtxRegexp jsCtx = iota + // jsCtxDivOp occurs where a '/' would start a division operator. + jsCtxDivOp +) + +func (c jsCtx) String() string { + switch c { + case jsCtxRegexp: + return "jsCtxRegexp" + case jsCtxDivOp: + return "jsCtxDivOp" + } + return fmt.Sprintf("illegal jsCtx %d", c) +} diff --git a/src/pkg/exp/template/html/escape.go b/src/pkg/exp/template/html/escape.go index e7de81c4c6860910c7a210b06826bdfe1a94e7f9..0eb8dfec8d523fea282cdfa66c708bdec459c4d6 100644 --- a/src/pkg/exp/template/html/escape.go +++ b/src/pkg/exp/template/html/escape.go @@ -33,7 +33,10 @@ } // funcMap maps command names to functions that render their inputs safe. var funcMap = template.FuncMap{ - "exp_template_html_urlfilter": urlFilter, + "exp_template_html_urlfilter": urlFilter, + "exp_template_html_jsvalescaper": jsValEscaper, + "exp_template_html_jsstrescaper": jsStrEscaper, + "exp_template_html_jsregexpescaper": jsRegexpEscaper, } // escape escapes a template node. @@ -58,15 +61,16 @@ } // escapeAction escapes an action template node. func escapeAction(c context, n *parse.ActionNode) context { - sanitizer := "html" - if c.state == stateURL { + s := make([]string, 0, 2) + switch c.state { + case stateURL: switch c.urlPart { case urlPartNone: - sanitizer = "exp_template_html_urlfilter" + s = append(s, "exp_template_html_urlfilter") case urlPartQueryOrFrag: - sanitizer = "urlquery" + s = append(s, "urlquery") case urlPartPreQuery: - // The default "html" works here. + s = append(s, "html") case urlPartUnknown: return context{ state: stateError, @@ -76,21 +80,94 @@ } default: panic(c.urlPart.String()) } + case stateJS: + s = append(s, "exp_template_html_jsvalescaper") + if c.delim != delimNone { + s = append(s, "html") + } + case stateJSDqStr, stateJSSqStr: + s = append(s, "exp_template_html_jsstrescaper") + case stateJSRegexp: + s = append(s, "exp_template_html_jsregexpescaper") + case stateJSBlockCmt, stateJSLineCmt: + return context{ + state: stateError, + errLine: n.Line, + errStr: fmt.Sprintf("%s appears inside a comment", n), + } + default: + s = append(s, "html") } - // If the pipe already ends with the sanitizer, do not interfere. - if m := len(n.Pipe.Cmds); m != 0 { - if last := n.Pipe.Cmds[m-1]; len(last.Args) != 0 { - if i, ok := last.Args[0].(*parse.IdentifierNode); ok && i.Ident == sanitizer { - return c + ensurePipelineContains(n.Pipe, s) + return c +} + +// ensurePipelineContains ensures that the pipeline has commands with +// the identifiers in s in order. +// If the pipeline already has some of the sanitizers, do not interfere. +// For example, if p is (.X | html) and s is ["escapeJSVal", "html"] then it +// has one matching, "html", and one to insert, "escapeJSVal", to produce +// (.X | escapeJSVal | html). +func ensurePipelineContains(p *parse.PipeNode, s []string) { + if len(s) == 0 { + return + } + n := len(p.Cmds) + // Find the identifiers at the end of the command chain. + idents := p.Cmds + for i := n - 1; i >= 0; i-- { + if cmd := p.Cmds[i]; len(cmd.Args) != 0 { + if _, ok := cmd.Args[0].(*parse.IdentifierNode); ok { + continue + } + } + idents = p.Cmds[i+1:] + } + dups := 0 + for _, id := range idents { + if s[dups] == (id.Args[0].(*parse.IdentifierNode)).Ident { + dups++ + if dups == len(s) { + return + } + } + } + newCmds := make([]*parse.CommandNode, n-len(idents), n+len(s)-dups) + copy(newCmds, p.Cmds) + // Merge existing identifier commands with the sanitizers needed. + for _, id := range idents { + i := indexOfStr((id.Args[0].(*parse.IdentifierNode)).Ident, s) + if i != -1 { + for _, name := range s[:i] { + newCmds = append(newCmds, newIdentCmd(name)) } + s = s[i+1:] } + newCmds = append(newCmds, id) + } + // Create any remaining sanitizers. + for _, name := range s { + newCmds = append(newCmds, newIdentCmd(name)) } - // Otherwise, append the sanitizer. - n.Pipe.Cmds = append(n.Pipe.Cmds, &parse.CommandNode{ + p.Cmds = newCmds +} + +// indexOfStr is the least i such that strs[i] == s or -1 if s is not in strs. +func indexOfStr(s string, strs []string) int { + for i, t := range strs { + if s == t { + return i + } + } + return -1 +} + +// newIdentCmd produces a command containing a single identifier node. +func newIdentCmd(identifier string) *parse.CommandNode { + return &parse.CommandNode{ NodeType: parse.NodeCommand, - Args: []parse.Node{parse.NewIdentifier(sanitizer)}, - }) - return c + Args: []parse.Node{parse.NewIdentifier(identifier)}, + } } // join joins the two contexts of a branch template node. The result is an @@ -203,11 +280,17 @@ // transitionFunc is the array of context transition functions for text nodes. // A transition function takes a context and template text input, and returns // the updated context and any unconsumed text. var transitionFunc = [...]func(context, []byte) (context, []byte){ - stateText: tText, - stateTag: tTag, - stateURL: tURL, - stateAttr: tAttr, - stateError: tError, + stateText: tText, + stateTag: tTag, + stateURL: tURL, + stateJS: tJS, + stateJSDqStr: tJSStr, + stateJSSqStr: tJSStr, + stateJSRegexp: tJSRegexp, + stateJSBlockCmt: tJSBlockCmt, + stateJSLineCmt: tJSLineCmt, + stateAttr: tAttr, + stateError: tError, } // tText is the context transition function for the text state. @@ -249,8 +332,11 @@ if i == len(s) { return context{state: stateTag}, nil } state := stateAttr - if urlAttr[strings.ToLower(string(s[attrStart:i]))] { + canonAttrName := strings.ToLower(string(s[attrStart:i])) + if urlAttr[canonAttrName] { state = stateURL + } else if strings.HasPrefix(canonAttrName, "on") { + state = stateJS } // Look for the start of the value. @@ -268,16 +354,17 @@ // Consume the "=". i = eatWhiteSpace(s, i+1) // Find the attribute delimiter. + delim := delimSpaceOrTagEnd if i < len(s) { switch s[i] { case '\'': - return context{state: state, delim: delimSingleQuote}, s[i+1:] + delim, i = delimSingleQuote, i+1 case '"': - return context{state: state, delim: delimDoubleQuote}, s[i+1:] + delim, i = delimDoubleQuote, i+1 } } - return context{state: state, delim: delimSpaceOrTagEnd}, s[i:] + return context{state: state, delim: delim}, s[i:] } // tAttr is the context transition function for the attribute state. @@ -293,6 +380,154 @@ } else if c.urlPart == urlPartNone { c.urlPart = urlPartPreQuery } return c, nil +} + +// tJS is the context transition function for the JS state. +func tJS(c context, s []byte) (context, []byte) { + // TODO: delegate to tSpecialTagEnd to find any once that CL + // has been merged. + + i := bytes.IndexAny(s, `"'/`) + if i == -1 { + // Entire input is non string, comment, regexp tokens. + c.jsCtx = nextJSCtx(s, c.jsCtx) + return c, nil + } + c.jsCtx = nextJSCtx(s[:i], c.jsCtx) + switch s[i] { + case '"': + c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp + case '\'': + c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp + case '/': + switch { + case i+1 < len(s) && s[i+1] == '/': + c.state = stateJSLineCmt + case i+1 < len(s) && s[i+1] == '*': + c.state = stateJSBlockCmt + case c.jsCtx == jsCtxRegexp: + c.state = stateJSRegexp + default: + c.jsCtx = jsCtxRegexp + } + default: + panic("unreachable") + } + return c, s[i+1:] +} + +// tJSStr is the context transition function for the JS string states. +func tJSStr(c context, s []byte) (context, []byte) { + // TODO: delegate to tSpecialTagEnd to find any once that CL + // has been merged. + + quoteAndEsc := `\"` + if c.state == stateJSSqStr { + quoteAndEsc = `\'` + } + + b := s + for { + i := bytes.IndexAny(b, quoteAndEsc) + if i == -1 { + return c, nil + } + if b[i] == '\\' { + i++ + if i == len(b) { + return context{ + state: stateError, + errStr: fmt.Sprintf("unfinished escape sequence in JS string: %q", s), + }, nil + } + } else { + c.state, c.jsCtx = stateJS, jsCtxDivOp + return c, b[i+1:] + } + b = b[i+1:] + } + panic("unreachable") +} + +// tJSRegexp is the context transition function for the /RegExp/ literal state. +func tJSRegexp(c context, s []byte) (context, []byte) { + // TODO: delegate to tSpecialTagEnd to find any once that CL + // has been merged. + + b := s + inCharset := false + for { + i := bytes.IndexAny(b, `/[\]`) + if i == -1 { + break + } + switch b[i] { + case '/': + if !inCharset { + c.state, c.jsCtx = stateJS, jsCtxDivOp + return c, b[i+1:] + } + case '\\': + i++ + if i == len(b) { + return context{ + state: stateError, + errStr: fmt.Sprintf("unfinished escape sequence in JS regexp: %q", s), + }, nil + } + case '[': + inCharset = true + case ']': + inCharset = false + default: + panic("unreachable") + } + b = b[i+1:] + } + + if inCharset { + // This can be fixed by making context richer if interpolation + // into charsets is desired. + return context{ + state: stateError, + errStr: fmt.Sprintf("unfinished JS regexp charset: %q", s), + }, nil + } + + return c, nil +} + +var blockCommentEnd = []byte("*/") + +// tJSBlockCmt is the context transition function for the JS /*comment*/ state. +func tJSBlockCmt(c context, s []byte) (context, []byte) { + // TODO: delegate to tSpecialTagEnd to find any once that CL + // has been merged. + + i := bytes.Index(s, blockCommentEnd) + if i == -1 { + return c, nil + } + c.state = stateJS + return c, s[i+2:] +} + +// tJSLineCmt is the context transition function for the JS //comment state. +func tJSLineCmt(c context, s []byte) (context, []byte) { + // TODO: delegate to tSpecialTagEnd to find any once that CL + // has been merged. + + i := bytes.IndexAny(s, "\r\n\u2028\u2029") + if i == -1 { + return c, nil + } + c.state = stateJS + // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4 + // "However, the LineTerminator at the end of the line is not + // considered to be part of the single-line comment; it is recognised + // separately by the lexical grammar and becomes part of the stream of + // input elements for the syntactic grammar." + return c, s[i:] } // tError is the context transition function for the error state. diff --git a/src/pkg/exp/template/html/escape_test.go b/src/pkg/exp/template/html/escape_test.go index a911c7d8357b3791e0f6f1611f6bd9c9a2153f0f..6f5ecf6ef3e84959189bd7f0d55a7eaa05a87b78 100644 --- a/src/pkg/exp/template/html/escape_test.go +++ b/src/pkg/exp/template/html/escape_test.go @@ -8,6 +8,7 @@ import ( "bytes" "strings" "template" + "template/parse" "testing" ) @@ -16,6 +17,8 @@ var data = struct { F, T bool C, G, H string A, E []string + N int + Z *int }{ F: false, T: true, @@ -24,9 +27,11 @@ G: "", H: "", A: []string{"", ""}, E: []string{}, + N: 42, + Z: nil, } - var testCases = []struct { + tests := []struct { name string input string output string @@ -141,29 +146,71 @@ "urlBranchConflictMoot", ``, ``, }, + { + "jsStrValue", + "