Skip to content

Commit a9fae7a

Browse files
axllentjhillyerd
andauthored
feat: add option to disable character detection (#342)
* feat: add option to disable character detection Resolves #340 --------- Signed-off-by: James Hillyerd <james@hillyerd.com> Co-authored-by: James Hillyerd <james@hillyerd.com>
1 parent 16957e7 commit a9fae7a

File tree

5 files changed

+48
-1
lines changed

5 files changed

+48
-1
lines changed

options.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,15 @@ func (o disableTextConversionOption) apply(p *Parser) {
100100
func DisableTextConversion(disableTextConversion bool) Option {
101101
return disableTextConversionOption(disableTextConversion)
102102
}
103+
104+
type disableCharacterDetectionOption bool
105+
106+
func (o disableCharacterDetectionOption) apply(p *Parser) {
107+
p.disableCharacterDetection = bool(o)
108+
}
109+
110+
// DisableCharacterDetection sets the disableCharacterDetection option. When true, the parser will use the
111+
// defined character set if it is defined in the message part.
112+
func DisableCharacterDetection(disableCharacterDetection bool) Option {
113+
return disableCharacterDetectionOption(disableCharacterDetection)
114+
}

parser.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ type Parser struct {
2727
customParseMediaType CustomParseMediaType
2828
stripMediaTypeInvalidCharacters bool
2929
disableTextConversion bool
30+
disableCharacterDetection bool
3031
}
3132

3233
// defaultParser is a Parser with default configuration.

part.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,8 @@ func (p *Part) convertFromDetectedCharset(r io.Reader, readPartErrorPolicy ReadP
210210
// Restore r.
211211
r = bytes.NewReader(buf)
212212

213-
if cs == nil || cs.Confidence < minCharsetConfidence || len(bytes.Runes(buf)) < minCharsetRuneLength {
213+
if (p.parser.disableCharacterDetection && p.Charset != "") ||
214+
(cs == nil || cs.Confidence < minCharsetConfidence || len(bytes.Runes(buf)) < minCharsetRuneLength) {
214215
// Low confidence or not enough characters, use declared character set.
215216
return p.convertFromStatedCharset(r), nil
216217
}

part_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1318,3 +1318,28 @@ func TestCtypeInvalidCharacters(t *testing.T) {
13181318

13191319
test.ComparePart(t, p, wantp)
13201320
}
1321+
1322+
func TestDisableCharacterDetectionPart(t *testing.T) {
1323+
var wantp *enmime.Part
1324+
1325+
// chardet considers this test file to be ISO-8859-1.
1326+
r := test.OpenTestData("parts", "chardet-detection.raw")
1327+
parser := enmime.NewParser(enmime.DisableCharacterDetection(true))
1328+
p, err := parser.ReadParts(r)
1329+
1330+
// Examine root
1331+
if err != nil {
1332+
t.Fatalf("Unexpected parse error: %+v", err)
1333+
}
1334+
if p == nil {
1335+
t.Fatal("Root node should not be nil")
1336+
}
1337+
1338+
wantp = &enmime.Part{
1339+
ContentType: "text/plain",
1340+
PartID: "0",
1341+
Charset: "utf-8",
1342+
}
1343+
1344+
test.ComparePart(t, p, wantp)
1345+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Content-Type: text/plain; charset=utf-8
2+
Content-Transfer-Encoding: quoted-printable
3+
4+
Loggen Sie sich ein, um die Einladung zu akzeptieren oder geben Sie den fol=
5+
gen1233
6+
7+
Nachricht:
8+
=C3=B6o=C3=B6o

0 commit comments

Comments
 (0)