| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 | package chardettype recognizer interface {	Match(*recognizerInput) recognizerOutput}type recognizerOutput Resulttype recognizerInput struct {	raw         []byte	input       []byte	tagStripped bool	byteStats   []int	hasC1Bytes  bool}func newRecognizerInput(raw []byte, stripTag bool) *recognizerInput {	input, stripped := mayStripInput(raw, stripTag)	byteStats := computeByteStats(input)	return &recognizerInput{		raw:         raw,		input:       input,		tagStripped: stripped,		byteStats:   byteStats,		hasC1Bytes:  computeHasC1Bytes(byteStats),	}}func mayStripInput(raw []byte, stripTag bool) (out []byte, stripped bool) {	const inputBufferSize = 8192	out = make([]byte, 0, inputBufferSize)	var badTags, openTags int32	var inMarkup bool = false	stripped = false	if stripTag {		stripped = true		for _, c := range raw {			if c == '<' {				if inMarkup {					badTags += 1				}				inMarkup = true				openTags += 1			}			if !inMarkup {				out = append(out, c)				if len(out) >= inputBufferSize {					break				}			}			if c == '>' {				inMarkup = false			}		}	}	if openTags < 5 || openTags/5 < badTags || (len(out) < 100 && len(raw) > 600) {		limit := len(raw)		if limit > inputBufferSize {			limit = inputBufferSize		}		out = make([]byte, limit)		copy(out, raw[:limit])		stripped = false	}	return}func computeByteStats(input []byte) []int {	r := make([]int, 256)	for _, c := range input {		r[c] += 1	}	return r}func computeHasC1Bytes(byteStats []int) bool {	for _, count := range byteStats[0x80 : 0x9F+1] {		if count > 0 {			return true		}	}	return false}
 |