| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 | 
							- package mahonia
 
- // Generic converters for multibyte character sets.
 
- // An mbcsTrie contains the data to convert from the character set to Unicode.
 
- // If a character would be encoded as "\x01\x02\x03", its unicode value would be found at t.children[1].children[2].children[3].rune
 
- // children either is nil or has 256 elements.
 
- type mbcsTrie struct {
 
- 	// For leaf nodes, the Unicode character that is represented.
 
- 	char rune
 
- 	// For non-leaf nodes, the trie to decode the remainder of the character.
 
- 	children []mbcsTrie
 
- }
 
- // A MBCSTable holds the data to convert to and from Unicode.
 
- type MBCSTable struct {
 
- 	toUnicode   mbcsTrie
 
- 	fromUnicode map[rune]string
 
- }
 
- // AddCharacter adds a character to the table. rune is its Unicode code point,
 
- // and bytes contains the bytes used to encode it in the character set.
 
- func (table *MBCSTable) AddCharacter(c rune, bytes string) {
 
- 	if table.fromUnicode == nil {
 
- 		table.fromUnicode = make(map[rune]string)
 
- 	}
 
- 	table.fromUnicode[c] = bytes
 
- 	trie := &table.toUnicode
 
- 	for i := 0; i < len(bytes); i++ {
 
- 		if trie.children == nil {
 
- 			trie.children = make([]mbcsTrie, 256)
 
- 		}
 
- 		b := bytes[i]
 
- 		trie = &trie.children[b]
 
- 	}
 
- 	trie.char = c
 
- }
 
- func (table *MBCSTable) Decoder() Decoder {
 
- 	return func(p []byte) (c rune, size int, status Status) {
 
- 		if len(p) == 0 {
 
- 			status = NO_ROOM
 
- 			return
 
- 		}
 
- 		if p[0] == 0 {
 
- 			return 0, 1, SUCCESS
 
- 		}
 
- 		trie := &table.toUnicode
 
- 		for trie.char == 0 {
 
- 			if trie.children == nil {
 
- 				return 0xfffd, 1, INVALID_CHAR
 
- 			}
 
- 			if len(p) < size+1 {
 
- 				return 0, 0, NO_ROOM
 
- 			}
 
- 			trie = &trie.children[p[size]]
 
- 			size++
 
- 		}
 
- 		c = trie.char
 
- 		status = SUCCESS
 
- 		return
 
- 	}
 
- }
 
- func (table *MBCSTable) Encoder() Encoder {
 
- 	return func(p []byte, c rune) (size int, status Status) {
 
- 		bytes := table.fromUnicode[c]
 
- 		if bytes == "" {
 
- 			if len(p) > 0 {
 
- 				p[0] = '?'
 
- 				return 1, INVALID_CHAR
 
- 			} else {
 
- 				return 0, NO_ROOM
 
- 			}
 
- 		}
 
- 		if len(p) < len(bytes) {
 
- 			return 0, NO_ROOM
 
- 		}
 
- 		return copy(p, bytes), SUCCESS
 
- 	}
 
- }
 
 
  |