| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648 | // Copyright 2012 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.// +build ignorepackage main// This program generates table.go and table_test.go.// Invoke as////	go run gen.go |gofmt >table.go//	go run gen.go -test |gofmt >table_test.goimport (	"flag"	"fmt"	"math/rand"	"os"	"sort"	"strings")// identifier converts s to a Go exported identifier.// It converts "div" to "Div" and "accept-charset" to "AcceptCharset".func identifier(s string) string {	b := make([]byte, 0, len(s))	cap := true	for _, c := range s {		if c == '-' {			cap = true			continue		}		if cap && 'a' <= c && c <= 'z' {			c -= 'a' - 'A'		}		cap = false		b = append(b, byte(c))	}	return string(b)}var test = flag.Bool("test", false, "generate table_test.go")func main() {	flag.Parse()	var all []string	all = append(all, elements...)	all = append(all, attributes...)	all = append(all, eventHandlers...)	all = append(all, extra...)	sort.Strings(all)	if *test {		fmt.Printf("// generated by go run gen.go -test; DO NOT EDIT\n\n")		fmt.Printf("package atom\n\n")		fmt.Printf("var testAtomList = []string{\n")		for _, s := range all {			fmt.Printf("\t%q,\n", s)		}		fmt.Printf("}\n")		return	}	// uniq - lists have dups	// compute max len too	maxLen := 0	w := 0	for _, s := range all {		if w == 0 || all[w-1] != s {			if maxLen < len(s) {				maxLen = len(s)			}			all[w] = s			w++		}	}	all = all[:w]	// Find hash that minimizes table size.	var best *table	for i := 0; i < 1000000; i++ {		if best != nil && 1<<(best.k-1) < len(all) {			break		}		h := rand.Uint32()		for k := uint(0); k <= 16; k++ {			if best != nil && k >= best.k {				break			}			var t table			if t.init(h, k, all) {				best = &t				break			}		}	}	if best == nil {		fmt.Fprintf(os.Stderr, "failed to construct string table\n")		os.Exit(1)	}	// Lay out strings, using overlaps when possible.	layout := append([]string{}, all...)	// Remove strings that are substrings of other strings	for changed := true; changed; {		changed = false		for i, s := range layout {			if s == "" {				continue			}			for j, t := range layout {				if i != j && t != "" && strings.Contains(s, t) {					changed = true					layout[j] = ""				}			}		}	}	// Join strings where one suffix matches another prefix.	for {		// Find best i, j, k such that layout[i][len-k:] == layout[j][:k],		// maximizing overlap length k.		besti := -1		bestj := -1		bestk := 0		for i, s := range layout {			if s == "" {				continue			}			for j, t := range layout {				if i == j {					continue				}				for k := bestk + 1; k <= len(s) && k <= len(t); k++ {					if s[len(s)-k:] == t[:k] {						besti = i						bestj = j						bestk = k					}				}			}		}		if bestk > 0 {			layout[besti] += layout[bestj][bestk:]			layout[bestj] = ""			continue		}		break	}	text := strings.Join(layout, "")	atom := map[string]uint32{}	for _, s := range all {		off := strings.Index(text, s)		if off < 0 {			panic("lost string " + s)		}		atom[s] = uint32(off<<8 | len(s))	}	// Generate the Go code.	fmt.Printf("// generated by go run gen.go; DO NOT EDIT\n\n")	fmt.Printf("package atom\n\nconst (\n")	for _, s := range all {		fmt.Printf("\t%s Atom = %#x\n", identifier(s), atom[s])	}	fmt.Printf(")\n\n")	fmt.Printf("const hash0 = %#x\n\n", best.h0)	fmt.Printf("const maxAtomLen = %d\n\n", maxLen)	fmt.Printf("var table = [1<<%d]Atom{\n", best.k)	for i, s := range best.tab {		if s == "" {			continue		}		fmt.Printf("\t%#x: %#x, // %s\n", i, atom[s], s)	}	fmt.Printf("}\n")	datasize := (1 << best.k) * 4	fmt.Printf("const atomText =\n")	textsize := len(text)	for len(text) > 60 {		fmt.Printf("\t%q +\n", text[:60])		text = text[60:]	}	fmt.Printf("\t%q\n\n", text)	fmt.Fprintf(os.Stderr, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)}type byLen []stringfunc (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }func (x byLen) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }func (x byLen) Len() int           { return len(x) }// fnv computes the FNV hash with an arbitrary starting value h.func fnv(h uint32, s string) uint32 {	for i := 0; i < len(s); i++ {		h ^= uint32(s[i])		h *= 16777619	}	return h}// A table represents an attempt at constructing the lookup table.// The lookup table uses cuckoo hashing, meaning that each string// can be found in one of two positions.type table struct {	h0   uint32	k    uint	mask uint32	tab  []string}// hash returns the two hashes for s.func (t *table) hash(s string) (h1, h2 uint32) {	h := fnv(t.h0, s)	h1 = h & t.mask	h2 = (h >> 16) & t.mask	return}// init initializes the table with the given parameters.// h0 is the initial hash value,// k is the number of bits of hash value to use, and// x is the list of strings to store in the table.// init returns false if the table cannot be constructed.func (t *table) init(h0 uint32, k uint, x []string) bool {	t.h0 = h0	t.k = k	t.tab = make([]string, 1<<k)	t.mask = 1<<k - 1	for _, s := range x {		if !t.insert(s) {			return false		}	}	return true}// insert inserts s in the table.func (t *table) insert(s string) bool {	h1, h2 := t.hash(s)	if t.tab[h1] == "" {		t.tab[h1] = s		return true	}	if t.tab[h2] == "" {		t.tab[h2] = s		return true	}	if t.push(h1, 0) {		t.tab[h1] = s		return true	}	if t.push(h2, 0) {		t.tab[h2] = s		return true	}	return false}// push attempts to push aside the entry in slot i.func (t *table) push(i uint32, depth int) bool {	if depth > len(t.tab) {		return false	}	s := t.tab[i]	h1, h2 := t.hash(s)	j := h1 + h2 - i	if t.tab[j] != "" && !t.push(j, depth+1) {		return false	}	t.tab[j] = s	return true}// The lists of element names and attribute keys were taken from// https://html.spec.whatwg.org/multipage/indices.html#index// as of the "HTML Living Standard - Last Updated 21 February 2015" version.var elements = []string{	"a",	"abbr",	"address",	"area",	"article",	"aside",	"audio",	"b",	"base",	"bdi",	"bdo",	"blockquote",	"body",	"br",	"button",	"canvas",	"caption",	"cite",	"code",	"col",	"colgroup",	"command",	"data",	"datalist",	"dd",	"del",	"details",	"dfn",	"dialog",	"div",	"dl",	"dt",	"em",	"embed",	"fieldset",	"figcaption",	"figure",	"footer",	"form",	"h1",	"h2",	"h3",	"h4",	"h5",	"h6",	"head",	"header",	"hgroup",	"hr",	"html",	"i",	"iframe",	"img",	"input",	"ins",	"kbd",	"keygen",	"label",	"legend",	"li",	"link",	"map",	"mark",	"menu",	"menuitem",	"meta",	"meter",	"nav",	"noscript",	"object",	"ol",	"optgroup",	"option",	"output",	"p",	"param",	"pre",	"progress",	"q",	"rp",	"rt",	"ruby",	"s",	"samp",	"script",	"section",	"select",	"small",	"source",	"span",	"strong",	"style",	"sub",	"summary",	"sup",	"table",	"tbody",	"td",	"template",	"textarea",	"tfoot",	"th",	"thead",	"time",	"title",	"tr",	"track",	"u",	"ul",	"var",	"video",	"wbr",}// https://html.spec.whatwg.org/multipage/indices.html#attributes-3var attributes = []string{	"abbr",	"accept",	"accept-charset",	"accesskey",	"action",	"alt",	"async",	"autocomplete",	"autofocus",	"autoplay",	"challenge",	"charset",	"checked",	"cite",	"class",	"cols",	"colspan",	"command",	"content",	"contenteditable",	"contextmenu",	"controls",	"coords",	"crossorigin",	"data",	"datetime",	"default",	"defer",	"dir",	"dirname",	"disabled",	"download",	"draggable",	"dropzone",	"enctype",	"for",	"form",	"formaction",	"formenctype",	"formmethod",	"formnovalidate",	"formtarget",	"headers",	"height",	"hidden",	"high",	"href",	"hreflang",	"http-equiv",	"icon",	"id",	"inputmode",	"ismap",	"itemid",	"itemprop",	"itemref",	"itemscope",	"itemtype",	"keytype",	"kind",	"label",	"lang",	"list",	"loop",	"low",	"manifest",	"max",	"maxlength",	"media",	"mediagroup",	"method",	"min",	"minlength",	"multiple",	"muted",	"name",	"novalidate",	"open",	"optimum",	"pattern",	"ping",	"placeholder",	"poster",	"preload",	"radiogroup",	"readonly",	"rel",	"required",	"reversed",	"rows",	"rowspan",	"sandbox",	"spellcheck",	"scope",	"scoped",	"seamless",	"selected",	"shape",	"size",	"sizes",	"sortable",	"sorted",	"span",	"src",	"srcdoc",	"srclang",	"start",	"step",	"style",	"tabindex",	"target",	"title",	"translate",	"type",	"typemustmatch",	"usemap",	"value",	"width",	"wrap",}var eventHandlers = []string{	"onabort",	"onautocomplete",	"onautocompleteerror",	"onafterprint",	"onbeforeprint",	"onbeforeunload",	"onblur",	"oncancel",	"oncanplay",	"oncanplaythrough",	"onchange",	"onclick",	"onclose",	"oncontextmenu",	"oncuechange",	"ondblclick",	"ondrag",	"ondragend",	"ondragenter",	"ondragleave",	"ondragover",	"ondragstart",	"ondrop",	"ondurationchange",	"onemptied",	"onended",	"onerror",	"onfocus",	"onhashchange",	"oninput",	"oninvalid",	"onkeydown",	"onkeypress",	"onkeyup",	"onlanguagechange",	"onload",	"onloadeddata",	"onloadedmetadata",	"onloadstart",	"onmessage",	"onmousedown",	"onmousemove",	"onmouseout",	"onmouseover",	"onmouseup",	"onmousewheel",	"onoffline",	"ononline",	"onpagehide",	"onpageshow",	"onpause",	"onplay",	"onplaying",	"onpopstate",	"onprogress",	"onratechange",	"onreset",	"onresize",	"onscroll",	"onseeked",	"onseeking",	"onselect",	"onshow",	"onsort",	"onstalled",	"onstorage",	"onsubmit",	"onsuspend",	"ontimeupdate",	"ontoggle",	"onunload",	"onvolumechange",	"onwaiting",}// extra are ad-hoc values not covered by any of the lists above.var extra = []string{	"align",	"annotation",	"annotation-xml",	"applet",	"basefont",	"bgsound",	"big",	"blink",	"center",	"color",	"desc",	"face",	"font",	"foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.	"foreignobject",	"frame",	"frameset",	"image",	"isindex",	"listing",	"malignmark",	"marquee",	"math",	"mglyph",	"mi",	"mn",	"mo",	"ms",	"mtext",	"nobr",	"noembed",	"noframes",	"plaintext",	"prompt",	"public",	"spacer",	"strike",	"svg",	"system",	"tt",	"xmp",}
 |