1
+ // Package chardet ports character set detection from ICU.
1
2
package chardet
2
3
3
4
import (
4
5
"errors"
5
6
"sort"
6
7
)
7
8
9
+ // Result contains all the information that charset detector gives.
8
10
type Result struct {
9
- Charset string
10
- Language string
11
+ // IANA name of the detected charset.
12
+ Charset string
13
+ // IANA name of the detected language. It may be empty for some charsets.
14
+ Language string
15
+ // Confidence of the Result. Scale from 1 to 100. The bigger, the more confident.
11
16
Confidence int
12
17
}
13
18
19
+ // Detector implements charset detection.
14
20
type Detector struct {
15
21
recognizers []recognizer
16
22
stripTag bool
@@ -63,10 +69,12 @@ var recognizers = []recognizer{
63
69
newRecognizer_IBM420_ar_ltr (),
64
70
}
65
71
72
+ // NewTextDetector creates a Detector for plain text.
66
73
func NewTextDetector () * Detector {
67
74
return & Detector {recognizers , false }
68
75
}
69
76
77
+ // NewHtmlDetector creates a Detector for Html.
70
78
func NewHtmlDetector () * Detector {
71
79
return & Detector {recognizers , true }
72
80
}
75
83
NotDetectedError = errors .New ("Charset not detected." )
76
84
)
77
85
86
+ // DetectBest returns the Result with highest Confidence.
78
87
func (d * Detector ) DetectBest (b []byte ) (r * Result , err error ) {
79
88
var all []Result
80
89
if all , err = d .DetectAll (b ); err == nil {
@@ -83,6 +92,7 @@ func (d *Detector) DetectBest(b []byte) (r *Result, err error) {
83
92
return
84
93
}
85
94
95
+ // DetectAll returns all Results which have non-zero Confidence. The Results are sorted by Confidence in descending order.
86
96
func (d * Detector ) DetectAll (b []byte ) ([]Result , error ) {
87
97
input := newRecognizerInput (b , d .stripTag )
88
98
outputChan := make (chan recognizerOutput )
0 commit comments