| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297 | // Copyright (c) 2014, David Kitchen <david@buro9.com>//// All rights reserved.//// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are met://// * Redistributions of source code must retain the above copyright notice, this//   list of conditions and the following disclaimer.//// * Redistributions in binary form must reproduce the above copyright notice,//   this list of conditions and the following disclaimer in the documentation//   and/or other materials provided with the distribution.//// * Neither the name of the organisation (Microcosm) nor the names of its//   contributors may be used to endorse or promote products derived from//   this software without specific prior written permission.//// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.package bluemondayimport (	"encoding/base64"	"net/url"	"regexp")// A selection of regular expressions that can be used as .Matching() rules on// HTML attributes.var (	// CellAlign handles the `align` attribute	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-align	CellAlign = regexp.MustCompile(`(?i)^(center|justify|left|right|char)$`)	// CellVerticalAlign handles the `valign` attribute	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-valign	CellVerticalAlign = regexp.MustCompile(`(?i)^(baseline|bottom|middle|top)$`)	// Direction handles the `dir` attribute	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/bdo#attr-dir	Direction = regexp.MustCompile(`(?i)^(rtl|ltr)$`)	// ImageAlign handles the `align` attribute on the `image` tag	// http://www.w3.org/MarkUp/Test/Img/imgtest.html	ImageAlign = regexp.MustCompile(		`(?i)^(left|right|top|texttop|middle|absmiddle|baseline|bottom|absbottom)$`,	)	// Integer describes whole positive integers (including 0) used in places	// like td.colspan	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-colspan	Integer = regexp.MustCompile(`^[0-9]+$`)	// ISO8601 according to the W3 group is only a subset of the ISO8601	// standard: http://www.w3.org/TR/NOTE-datetime	//	// Used in places like time.datetime	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/time#attr-datetime	//	// Matches patterns:	//  Year:	//     YYYY (eg 1997)	//  Year and month:	//     YYYY-MM (eg 1997-07)	//  Complete date:	//     YYYY-MM-DD (eg 1997-07-16)	//  Complete date plus hours and minutes:	//     YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00)	//  Complete date plus hours, minutes and seconds:	//     YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)	//  Complete date plus hours, minutes, seconds and a decimal fraction of a	//  second	//      YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)	ISO8601 = regexp.MustCompile(		`^[0-9]{4}(-[0-9]{2}(-[0-9]{2}([ T][0-9]{2}(:[0-9]{2}){1,2}(.[0-9]{1,6})` +			`?Z?([\+-][0-9]{2}:[0-9]{2})?)?)?)?$`,	)	// ListType encapsulates the common value as well as the latest spec	// values for lists	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ol#attr-type	ListType = regexp.MustCompile(`(?i)^(circle|disc|square|a|A|i|I|1)$`)	// SpaceSeparatedTokens is used in places like `a.rel` and the common attribute	// `class` which both contain space delimited lists of data tokens	// http://www.w3.org/TR/html-markup/datatypes.html#common.data.tokens-def	// Regexp: \p{L} matches unicode letters, \p{N} matches unicode numbers	SpaceSeparatedTokens = regexp.MustCompile(`^([\s\p{L}\p{N}_-]+)$`)	// Number is a double value used on HTML5 meter and progress elements	// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-button-element.html#the-meter-element	Number = regexp.MustCompile(`^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$`)	// NumberOrPercent is used predominantly as units of measurement in width	// and height attributes	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attr-height	NumberOrPercent = regexp.MustCompile(`^[0-9]+[%]?$`)	// Paragraph of text in an attribute such as *.'title', img.alt, etc	// https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes#attr-title	// Note that we are not allowing chars that could close tags like '>'	Paragraph = regexp.MustCompile(`^[\p{L}\p{N}\s\-_',\[\]!\./\\\(\)]*$`)	// dataURIImagePrefix is used by AllowDataURIImages to define the acceptable	// prefix of data URIs that contain common web image formats.	//	// This is not exported as it's not useful by itself, and only has value	// within the AllowDataURIImages func	dataURIImagePrefix = regexp.MustCompile(		`^image/(gif|jpeg|png|webp);base64,`,	))// AllowStandardURLs is a convenience function that will enable rel="nofollow"// on "a", "area" and "link" (if you have allowed those elements) and will// ensure that the URL values are parseable and either relative or belong to the// "mailto", "http", or "https" schemesfunc (p *Policy) AllowStandardURLs() {	// URLs must be parseable by net/url.Parse()	p.RequireParseableURLs(true)	// !url.IsAbs() is permitted	p.AllowRelativeURLs(true)	// Most common URL schemes only	p.AllowURLSchemes("mailto", "http", "https")	// For all anchors we will add rel="nofollow" if it does not already exist	// This applies to "a" "area" "link"	p.RequireNoFollowOnLinks(true)}// AllowStandardAttributes will enable "id", "title" and the language specific// attributes "dir" and "lang" on all elements that are whitelistedfunc (p *Policy) AllowStandardAttributes() {	// "dir" "lang" are permitted as both language attributes affect charsets	// and direction of text.	p.AllowAttrs("dir").Matching(Direction).Globally()	p.AllowAttrs(		"lang",	).Matching(regexp.MustCompile(`[a-zA-Z]{2,20}`)).Globally()	// "id" is permitted. This is pretty much as some HTML elements require this	// to work well ("dfn" is an example of a "id" being value)	// This does create a risk that JavaScript and CSS within your web page	// might identify the wrong elements. Ensure that you select things	// accurately	p.AllowAttrs("id").Matching(		regexp.MustCompile(`[a-zA-Z0-9\:\-_\.]+`),	).Globally()	// "title" is permitted as it improves accessibility.	p.AllowAttrs("title").Matching(Paragraph).Globally()}// AllowStyling presently enables the class attribute globally.//// Note: When bluemonday ships a CSS parser and we can safely sanitise that,// this will also allow sanitized styling of elements via the style attribute.func (p *Policy) AllowStyling() {	// "class" is permitted globally	p.AllowAttrs("class").Matching(SpaceSeparatedTokens).Globally()}// AllowImages enables the img element and some popular attributes. It will also// ensure that URL values are parseable. This helper does not enable data URI// images, for that you should also use the AllowDataURIImages() helper.func (p *Policy) AllowImages() {	// "img" is permitted	p.AllowAttrs("align").Matching(ImageAlign).OnElements("img")	p.AllowAttrs("alt").Matching(Paragraph).OnElements("img")	p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("img")	// Standard URLs enabled	p.AllowStandardURLs()	p.AllowAttrs("src").OnElements("img")}// AllowDataURIImages permits the use of inline images defined in RFC2397// http://tools.ietf.org/html/rfc2397// http://en.wikipedia.org/wiki/Data_URI_scheme//// Images must have a mimetype matching://   image/gif//   image/jpeg//   image/png//   image/webp//// NOTE: There is a potential security risk to allowing data URIs and you should// only permit them on content you already trust.// http://palizine.plynt.com/issues/2010Oct/bypass-xss-filters/// https://capec.mitre.org/data/definitions/244.htmlfunc (p *Policy) AllowDataURIImages() {	// URLs must be parseable by net/url.Parse()	p.RequireParseableURLs(true)	// Supply a function to validate images contained within data URI	p.AllowURLSchemeWithCustomPolicy(		"data",		func(url *url.URL) (allowUrl bool) {			if url.RawQuery != "" || url.Fragment != "" {				return false			}			matched := dataURIImagePrefix.FindString(url.Opaque)			if matched == "" {				return false			}			_, err := base64.StdEncoding.DecodeString(url.Opaque[len(matched):])			if err != nil {				return false			}			return true		},	)}// AllowLists will enabled ordered and unordered lists, as well as definition// listsfunc (p *Policy) AllowLists() {	// "ol" "ul" are permitted	p.AllowAttrs("type").Matching(ListType).OnElements("ol", "ul")	// "li" is permitted	p.AllowAttrs("type").Matching(ListType).OnElements("li")	p.AllowAttrs("value").Matching(Integer).OnElements("li")	// "dl" "dt" "dd" are permitted	p.AllowElements("dl", "dt", "dd")}// AllowTables will enable a rich set of elements and attributes to describe// HTML tablesfunc (p *Policy) AllowTables() {	// "table" is permitted	p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("table")	p.AllowAttrs("summary").Matching(Paragraph).OnElements("table")	// "caption" is permitted	p.AllowElements("caption")	// "col" "colgroup" are permitted	p.AllowAttrs("align").Matching(CellAlign).OnElements("col", "colgroup")	p.AllowAttrs("height", "width").Matching(		NumberOrPercent,	).OnElements("col", "colgroup")	p.AllowAttrs("span").Matching(Integer).OnElements("colgroup", "col")	p.AllowAttrs("valign").Matching(		CellVerticalAlign,	).OnElements("col", "colgroup")	// "thead" "tr" are permitted	p.AllowAttrs("align").Matching(CellAlign).OnElements("thead", "tr")	p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("thead", "tr")	// "td" "th" are permitted	p.AllowAttrs("abbr").Matching(Paragraph).OnElements("td", "th")	p.AllowAttrs("align").Matching(CellAlign).OnElements("td", "th")	p.AllowAttrs("colspan", "rowspan").Matching(Integer).OnElements("td", "th")	p.AllowAttrs("headers").Matching(		SpaceSeparatedTokens,	).OnElements("td", "th")	p.AllowAttrs("height", "width").Matching(		NumberOrPercent,	).OnElements("td", "th")	p.AllowAttrs(		"scope",	).Matching(		regexp.MustCompile(`(?i)(?:row|col)(?:group)?`),	).OnElements("td", "th")	p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("td", "th")	p.AllowAttrs("nowrap").Matching(		regexp.MustCompile(`(?i)|nowrap`),	).OnElements("td", "th")	// "tbody" "tfoot"	p.AllowAttrs("align").Matching(CellAlign).OnElements("tbody", "tfoot")	p.AllowAttrs("valign").Matching(		CellVerticalAlign,	).OnElements("tbody", "tfoot")}
 |