Skip to content

Commit eb1e2b7

Browse files
committed
Finalize interface for detector
1 parent ae559a9 commit eb1e2b7

File tree

3 files changed

+16
-13
lines changed

3 files changed

+16
-13
lines changed

base_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ type chardetTester struct {
1010

1111
func newChardetTester(r ...recognizer) *chardetTester {
1212
if len(r) == 0 {
13-
return &chardetTester{NewDetector()}
13+
return &chardetTester{NewHtmlDetector()}
1414
}
15-
return &chardetTester{&Detector{r}}
15+
return &chardetTester{&Detector{r, true}}
1616
}
1717

1818
func (this *chardetTester) ExpectBest(b []byte, charset string, lang string, t *testing.T) bool {
19-
r, err := this.d.DetectBest(b, true, "")
19+
r, err := this.d.DetectBest(b)
2020
if err != nil {
2121
t.Error(err)
2222
return false
@@ -29,7 +29,7 @@ func (this *chardetTester) ExpectBest(b []byte, charset string, lang string, t *
2929
}
3030

3131
func (this *chardetTester) ExpectUnknown(b []byte, t *testing.T) bool {
32-
r, err := this.d.DetectBest(b, true, "")
32+
r, err := this.d.DetectBest(b)
3333
if err == nil {
3434
t.Errorf("Expect unknown, actual %#v", *r)
3535
return false

detector.go

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ type Result struct {
1313

1414
type Detector struct {
1515
recognizers []recognizer
16+
stripTag bool
1617
}
1718

1819
// List of charset recognizers
@@ -62,24 +63,28 @@ var recognizers = []recognizer{
6263
newRecognizer_IBM420_ar_ltr(),
6364
}
6465

65-
func NewDetector() *Detector {
66-
return &Detector{recognizers}
66+
func NewTextDetector() *Detector {
67+
return &Detector{recognizers, false}
68+
}
69+
70+
func NewHtmlDetector() *Detector {
71+
return &Detector{recognizers, true}
6772
}
6873

6974
var (
7075
NotDetectedError = errors.New("Charset not detected.")
7176
)
7277

73-
func (d *Detector) DetectBest(b []byte, stripTag bool, declaredCharset string) (r *Result, err error) {
78+
func (d *Detector) DetectBest(b []byte) (r *Result, err error) {
7479
var all []Result
75-
if all, err = d.DetectAll(b, stripTag, declaredCharset); err == nil {
80+
if all, err = d.DetectAll(b); err == nil {
7681
r = &all[0]
7782
}
7883
return
7984
}
8085

81-
func (d *Detector) DetectAll(b []byte, stripTag bool, declaredCharset string) ([]Result, error) {
82-
input := newRecognizerInput(b, stripTag, declaredCharset)
86+
func (d *Detector) DetectAll(b []byte) ([]Result, error) {
87+
input := newRecognizerInput(b, d.stripTag)
8388
outputChan := make(chan recognizerOutput)
8489
for _, r := range d.recognizers {
8590
go matchHelper(r, input, outputChan)

recognizer.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,17 @@ type recognizerInput struct {
1010
raw []byte
1111
input []byte
1212
tagStripped bool
13-
declaredCharset string
1413
byteStats []int
1514
hasC1Bytes bool
1615
}
1716

18-
func newRecognizerInput(raw []byte, stripTag bool, declaredCharset string) *recognizerInput {
17+
func newRecognizerInput(raw []byte, stripTag bool) *recognizerInput {
1918
input, stripped := mayStripInput(raw, stripTag)
2019
byteStats := computeByteStats(input)
2120
return &recognizerInput{
2221
raw: raw,
2322
input: input,
2423
tagStripped: stripped,
25-
declaredCharset: declaredCharset,
2624
byteStats: byteStats,
2725
hasC1Bytes: computeHasC1Bytes(byteStats),
2826
}

0 commit comments

Comments
 (0)