| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 | package mahonia// Generic converters for multibyte character sets.// An mbcsTrie contains the data to convert from the character set to Unicode.// If a character would be encoded as "\x01\x02\x03", its unicode value would be found at t.children[1].children[2].children[3].rune// children either is nil or has 256 elements.type mbcsTrie struct {	// For leaf nodes, the Unicode character that is represented.	char rune	// For non-leaf nodes, the trie to decode the remainder of the character.	children []mbcsTrie}// A MBCSTable holds the data to convert to and from Unicode.type MBCSTable struct {	toUnicode   mbcsTrie	fromUnicode map[rune]string}// AddCharacter adds a character to the table. rune is its Unicode code point,// and bytes contains the bytes used to encode it in the character set.func (table *MBCSTable) AddCharacter(c rune, bytes string) {	if table.fromUnicode == nil {		table.fromUnicode = make(map[rune]string)	}	table.fromUnicode[c] = bytes	trie := &table.toUnicode	for i := 0; i < len(bytes); i++ {		if trie.children == nil {			trie.children = make([]mbcsTrie, 256)		}		b := bytes[i]		trie = &trie.children[b]	}	trie.char = c}func (table *MBCSTable) Decoder() Decoder {	return func(p []byte) (c rune, size int, status Status) {		if len(p) == 0 {			status = NO_ROOM			return		}		if p[0] == 0 {			return 0, 1, SUCCESS		}		trie := &table.toUnicode		for trie.char == 0 {			if trie.children == nil {				return 0xfffd, 1, INVALID_CHAR			}			if len(p) < size+1 {				return 0, 0, NO_ROOM			}			trie = &trie.children[p[size]]			size++		}		c = trie.char		status = SUCCESS		return	}}func (table *MBCSTable) Encoder() Encoder {	return func(p []byte, c rune) (size int, status Status) {		bytes := table.fromUnicode[c]		if bytes == "" {			if len(p) > 0 {				p[0] = '?'				return 1, INVALID_CHAR			} else {				return 0, NO_ROOM			}		}		if len(p) < len(bytes) {			return 0, NO_ROOM		}		return copy(p, bytes), SUCCESS	}}
 |