1+ // Package chardet ports character set detection from ICU.
12package chardet
23
34import (
45 "errors"
56 "sort"
67)
78
9+ // Result contains all the information that charset detector gives.
810type Result struct {
9- Charset string
10- Language string
11+ // IANA name of the detected charset.
12+ Charset string
13+ // IANA name of the detected language. It may be empty for some charsets.
14+ Language string
15+ // Confidence of the Result. Scale from 1 to 100. The bigger, the more confident.
1116 Confidence int
1217}
1318
19+ // Detector implements charset detection.
1420type Detector struct {
1521 recognizers []recognizer
1622 stripTag bool
@@ -63,10 +69,12 @@ var recognizers = []recognizer{
6369 newRecognizer_IBM420_ar_ltr (),
6470}
6571
72+ // NewTextDetector creates a Detector for plain text.
6673func NewTextDetector () * Detector {
6774 return & Detector {recognizers , false }
6875}
6976
77+ // NewHtmlDetector creates a Detector for Html.
7078func NewHtmlDetector () * Detector {
7179 return & Detector {recognizers , true }
7280}
7583 NotDetectedError = errors .New ("Charset not detected." )
7684)
7785
86+ // DetectBest returns the Result with highest Confidence.
7887func (d * Detector ) DetectBest (b []byte ) (r * Result , err error ) {
7988 var all []Result
8089 if all , err = d .DetectAll (b ); err == nil {
@@ -83,6 +92,7 @@ func (d *Detector) DetectBest(b []byte) (r *Result, err error) {
8392 return
8493}
8594
95+ // DetectAll returns all Results which have non-zero Confidence. The Results are sorted by Confidence in descending order.
8696func (d * Detector ) DetectAll (b []byte ) ([]Result , error ) {
8797 input := newRecognizerInput (b , d .stripTag )
8898 outputChan := make (chan recognizerOutput )
0 commit comments