| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396 | // Copyright 2013 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package languageimport (	"bytes"	"fmt"	"sort"	"strconv"	"golang.org/x/text/internal/tag")// findIndex tries to find the given tag in idx and returns a standardized error// if it could not be found.func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {	if !tag.FixCase(form, key) {		return 0, errSyntax	}	i := idx.Index(key)	if i == -1 {		return 0, mkErrInvalid(key)	}	return i, nil}func searchUint(imap []uint16, key uint16) int {	return sort.Search(len(imap), func(i int) bool {		return imap[i] >= key	})}type langID uint16// getLangID returns the langID of s if s is a canonical subtag// or langUnknown if s is not a canonical subtag.func getLangID(s []byte) (langID, error) {	if len(s) == 2 {		return getLangISO2(s)	}	return getLangISO3(s)}// mapLang returns the mapped langID of id according to mapping m.func normLang(id langID) (langID, langAliasType) {	k := sort.Search(len(langAliasMap), func(i int) bool {		return langAliasMap[i].from >= uint16(id)	})	if k < len(langAliasMap) && langAliasMap[k].from == uint16(id) {		return langID(langAliasMap[k].to), langAliasTypes[k]	}	return id, langAliasTypeUnknown}// getLangISO2 returns the langID for the given 2-letter ISO language code// or unknownLang if this does not exist.func getLangISO2(s []byte) (langID, error) {	if !tag.FixCase("zz", s) {		return 0, errSyntax	}	if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {		return langID(i), nil	}	return 0, mkErrInvalid(s)}const base = 'z' - 'a' + 1func strToInt(s []byte) uint {	v := uint(0)	for i := 0; i < len(s); i++ {		v *= base		v += uint(s[i] - 'a')	}	return v}// converts the given integer to the original ASCII string passed to strToInt.// len(s) must match the number of characters obtained.func intToStr(v uint, s []byte) {	for i := len(s) - 1; i >= 0; i-- {		s[i] = byte(v%base) + 'a'		v /= base	}}// getLangISO3 returns the langID for the given 3-letter ISO language code// or unknownLang if this does not exist.func getLangISO3(s []byte) (langID, error) {	if tag.FixCase("und", s) {		// first try to match canonical 3-letter entries		for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {			if e := lang.Elem(i); e[3] == 0 && e[2] == s[2] {				// We treat "und" as special and always translate it to "unspecified".				// Note that ZZ and Zzzz are private use and are not treated as				// unspecified by default.				id := langID(i)				if id == nonCanonicalUnd {					return 0, nil				}				return id, nil			}		}		if i := altLangISO3.Index(s); i != -1 {			return langID(altLangIndex[altLangISO3.Elem(i)[3]]), nil		}		n := strToInt(s)		if langNoIndex[n/8]&(1<<(n%8)) != 0 {			return langID(n) + langNoIndexOffset, nil		}		// Check for non-canonical uses of ISO3.		for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {			if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {				return langID(i), nil			}		}		return 0, mkErrInvalid(s)	}	return 0, errSyntax}// stringToBuf writes the string to b and returns the number of bytes// written.  cap(b) must be >= 3.func (id langID) stringToBuf(b []byte) int {	if id >= langNoIndexOffset {		intToStr(uint(id)-langNoIndexOffset, b[:3])		return 3	} else if id == 0 {		return copy(b, "und")	}	l := lang[id<<2:]	if l[3] == 0 {		return copy(b, l[:3])	}	return copy(b, l[:2])}// String returns the BCP 47 representation of the langID.// Use b as variable name, instead of id, to ensure the variable// used is consistent with that of Base in which this type is embedded.func (b langID) String() string {	if b == 0 {		return "und"	} else if b >= langNoIndexOffset {		b -= langNoIndexOffset		buf := [3]byte{}		intToStr(uint(b), buf[:])		return string(buf[:])	}	l := lang.Elem(int(b))	if l[3] == 0 {		return l[:3]	}	return l[:2]}// ISO3 returns the ISO 639-3 language code.func (b langID) ISO3() string {	if b == 0 || b >= langNoIndexOffset {		return b.String()	}	l := lang.Elem(int(b))	if l[3] == 0 {		return l[:3]	} else if l[2] == 0 {		return altLangISO3.Elem(int(l[3]))[:3]	}	// This allocation will only happen for 3-letter ISO codes	// that are non-canonical BCP 47 language identifiers.	return l[0:1] + l[2:4]}// IsPrivateUse reports whether this language code is reserved for private use.func (b langID) IsPrivateUse() bool {	return langPrivateStart <= b && b <= langPrivateEnd}type regionID uint16// getRegionID returns the region id for s if s is a valid 2-letter region code// or unknownRegion.func getRegionID(s []byte) (regionID, error) {	if len(s) == 3 {		if isAlpha(s[0]) {			return getRegionISO3(s)		}		if i, err := strconv.ParseUint(string(s), 10, 10); err == nil {			return getRegionM49(int(i))		}	}	return getRegionISO2(s)}// getRegionISO2 returns the regionID for the given 2-letter ISO country code// or unknownRegion if this does not exist.func getRegionISO2(s []byte) (regionID, error) {	i, err := findIndex(regionISO, s, "ZZ")	if err != nil {		return 0, err	}	return regionID(i) + isoRegionOffset, nil}// getRegionISO3 returns the regionID for the given 3-letter ISO country code// or unknownRegion if this does not exist.func getRegionISO3(s []byte) (regionID, error) {	if tag.FixCase("ZZZ", s) {		for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {			if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {				return regionID(i) + isoRegionOffset, nil			}		}		for i := 0; i < len(altRegionISO3); i += 3 {			if tag.Compare(altRegionISO3[i:i+3], s) == 0 {				return regionID(altRegionIDs[i/3]), nil			}		}		return 0, mkErrInvalid(s)	}	return 0, errSyntax}func getRegionM49(n int) (regionID, error) {	if 0 < n && n <= 999 {		const (			searchBits = 7			regionBits = 9			regionMask = 1<<regionBits - 1		)		idx := n >> searchBits		buf := fromM49[m49Index[idx]:m49Index[idx+1]]		val := uint16(n) << regionBits // we rely on bits shifting out		i := sort.Search(len(buf), func(i int) bool {			return buf[i] >= val		})		if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {			return regionID(r & regionMask), nil		}	}	var e ValueError	fmt.Fprint(bytes.NewBuffer([]byte(e.v[:])), n)	return 0, e}// normRegion returns a region if r is deprecated or 0 otherwise.// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).// TODO: consider mapping split up regions to new most populous one (like CLDR).func normRegion(r regionID) regionID {	m := regionOldMap	k := sort.Search(len(m), func(i int) bool {		return m[i].from >= uint16(r)	})	if k < len(m) && m[k].from == uint16(r) {		return regionID(m[k].to)	}	return 0}const (	iso3166UserAssigned = 1 << iota	ccTLD	bcp47Region)func (r regionID) typ() byte {	return regionTypes[r]}// String returns the BCP 47 representation for the region.// It returns "ZZ" for an unspecified region.func (r regionID) String() string {	if r < isoRegionOffset {		if r == 0 {			return "ZZ"		}		return fmt.Sprintf("%03d", r.M49())	}	r -= isoRegionOffset	return regionISO.Elem(int(r))[:2]}// ISO3 returns the 3-letter ISO code of r.// Note that not all regions have a 3-letter ISO code.// In such cases this method returns "ZZZ".func (r regionID) ISO3() string {	if r < isoRegionOffset {		return "ZZZ"	}	r -= isoRegionOffset	reg := regionISO.Elem(int(r))	switch reg[2] {	case 0:		return altRegionISO3[reg[3]:][:3]	case ' ':		return "ZZZ"	}	return reg[0:1] + reg[2:4]}// M49 returns the UN M.49 encoding of r, or 0 if this encoding// is not defined for r.func (r regionID) M49() int {	return int(m49[r])}// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This// may include private-use tags that are assigned by CLDR and used in this// implementation. So IsPrivateUse and IsCountry can be simultaneously true.func (r regionID) IsPrivateUse() bool {	return r.typ()&iso3166UserAssigned != 0}type scriptID uint8// getScriptID returns the script id for string s. It assumes that s// is of the format [A-Z][a-z]{3}.func getScriptID(idx tag.Index, s []byte) (scriptID, error) {	i, err := findIndex(idx, s, "Zzzz")	return scriptID(i), err}// String returns the script code in title case.// It returns "Zzzz" for an unspecified script.func (s scriptID) String() string {	if s == 0 {		return "Zzzz"	}	return script.Elem(int(s))}// IsPrivateUse reports whether this script code is reserved for private use.func (s scriptID) IsPrivateUse() bool {	return _Qaaa <= s && s <= _Qabx}const (	maxAltTaglen = len("en-US-POSIX")	maxLen       = maxAltTaglen)var (	// grandfatheredMap holds a mapping from legacy and grandfathered tags to	// their base language or index to more elaborate tag.	grandfatheredMap = map[[maxLen]byte]int16{		[maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo, // art-lojban		[maxLen]byte{'i', '-', 'a', 'm', 'i'}:                          _ami, // i-ami		[maxLen]byte{'i', '-', 'b', 'n', 'n'}:                          _bnn, // i-bnn		[maxLen]byte{'i', '-', 'h', 'a', 'k'}:                          _hak, // i-hak		[maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}:      _tlh, // i-klingon		[maxLen]byte{'i', '-', 'l', 'u', 'x'}:                          _lb,  // i-lux		[maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}:           _nv,  // i-navajo		[maxLen]byte{'i', '-', 'p', 'w', 'n'}:                          _pwn, // i-pwn		[maxLen]byte{'i', '-', 't', 'a', 'o'}:                          _tao, // i-tao		[maxLen]byte{'i', '-', 't', 'a', 'y'}:                          _tay, // i-tay		[maxLen]byte{'i', '-', 't', 's', 'u'}:                          _tsu, // i-tsu		[maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}:                     _nb,  // no-bok		[maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}:                     _nn,  // no-nyn		[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}:      _sfb, // sgn-BE-FR		[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}:      _vgt, // sgn-BE-NL		[maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}:      _sgg, // sgn-CH-DE		[maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}:           _cmn, // zh-guoyu		[maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}:           _hak, // zh-hakka		[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan, // zh-min-nan		[maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}:           _hsn, // zh-xiang		// Grandfathered tags with no modern replacement will be converted as		// follows:		[maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1, // cel-gaulish		[maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}:           -2, // en-GB-oed		[maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}:           -3, // i-default		[maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}:      -4, // i-enochian		[maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}:                     -5, // i-mingo		[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}:                          -6, // zh-min		// CLDR-specific tag.		[maxLen]byte{'r', 'o', 'o', 't'}:                                    0,  // root		[maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7, // en_US_POSIX"	}	altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102}	altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix")func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) {	if v, ok := grandfatheredMap[s]; ok {		if v < 0 {			return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true		}		t.lang = langID(v)		return t, true	}	return t, false}
 |