Skip to content

Commit 9e0c7ff

Browse files
Added a few more encodings
1 parent 32d445d commit 9e0c7ff

File tree

6 files changed

+42
-5
lines changed

6 files changed

+42
-5
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
* Added infrastructure to make it easy to add variable-byte (up to two bytes) encodings and single-byte encodings.
77
* Added the following encodings:
88
* windows-874
9+
* x-mac-ce
910
* x-mac-cyrillic
11+
* x-mac-greek
12+
* x-mac-icelandic
13+
* x-mac-turkish
1014
* Fixed an issue in the save functions that left the possibility for the zip files to not end up closing if the save function created it and then had an exception.
1115
* Added new property `AttachmentBase.clsid` which returns the listed CLSID value of the data stream/storage of the attachment.
1216
* Changed internal behavior of `MSGFile.attachments`. This should not cause any noticeable changes to the output.

extract_msg/encoding/__init__.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@
9696
10004: 'x-mac-arabic', # Arabic (Mac)
9797
# UNSUPPORTED.
9898
10005: 'x-mac-hebrew', # Hebrew (Mac)
99-
# UNSUPPORTED.
10099
10006: 'x-mac-greek', # Greek (Mac)
101100
10007: 'x-mac-cyrillic', # Cyrillic (Mac)
102101
# UNSUPPORTED.
@@ -107,11 +106,8 @@
107106
10017: 'x-mac-ukrainian', # Ukrainian (Mac)
108107
# UNSUPPORTED.
109108
10021: 'x-mac-thai', # Thai (Mac)
110-
# UNSUPPORTED.
111109
10029: 'x-mac-ce', # MAC Latin 2; Central European (Mac)
112-
# UNSUPPORTED.
113110
10079: 'x-mac-icelandic', # Icelandic (Mac)
114-
# UNSUPPORTED.
115111
10081: 'x-mac-turkish', # Turkish (Mac)
116112
# UNSUPPORTED.
117113
10082: 'x-mac-croatian', # Croatian (Mac)
@@ -264,11 +260,16 @@ def _lookupEncoding(name):
264260

265261
from .utils import createSBEncoding as _sb, createVBEncoding as _vb
266262
from ._dt import (
267-
_mac_cyrillic, _win874_dec, _win950_dec
263+
_mac_ce, _mac_cyrillic, _mac_greek, _mac_iceland, _mac_turkish,
264+
_win874_dec, _win950_dec
268265
)
269266

270267
_codecsInfo = {
268+
'x_mac_ce': _sb('x-mac-ce', _mac_ce.decodingTable),
271269
'x_mac_cyrillic': _sb('x-mac-cyrillic', _mac_cyrillic.decodingTable),
270+
'x_mac_greek': _sb('x-mac-greek', _mac_greek.decodingTable),
271+
'x_mac_icelandic': _sb('x-mac-icelandic', _mac_iceland.decodingTable),
272+
'x_mac_turkish': _sb('x-mac-turkish', _mac_turkish.decodingTable),
272273
'windows_950': _vb('windows-950', _win950_dec.decodingTable),
273274
'windows_874': _sb('windows-874', _win874_dec.decodingTable),
274275
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Based on https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT
2+
3+
__all__ = [
4+
'decodingTable',
5+
]
6+
7+
8+
decodingTable={0:'\x00',1:'\x01',2:'\x02',3:'\x03',4:'\x04',5:'\x05',6:'\x06',7:'\x07',8:'\x08',9:'\t',10:'\n',11:'\x0b',12:'\x0c',13:'\r',14:'\x0e',15:'\x0f',16:'\x10',17:'\x11',18:'\x12',19:'\x13',20:'\x14',21:'\x15',22:'\x16',23:'\x17',24:'\x18',25:'\x19',26:'\x1a',27:'\x1b',28:'\x1c',29:'\x1d',30:'\x1e',31:'\x1f',32:' ',33:'!',34:'"',35:'#',36:'$',37:'%',38:'&',39:"'",40:'(',41:')',42:'*',43:'+',44:',',45:'-',46:'.',47:'/',48:'0',49:'1',50:'2',51:'3',52:'4',53:'5',54:'6',55:'7',56:'8',57:'9',58:':',59:';',60:'<',61:'=',62:'>',63:'?',64:'@',65:'A',66:'B',67:'C',68:'D',69:'E',70:'F',71:'G',72:'H',73:'I',74:'J',75:'K',76:'L',77:'M',78:'N',79:'O',80:'P',81:'Q',82:'R',83:'S',84:'T',85:'U',86:'V',87:'W',88:'X',89:'Y',90:'Z',91:'[',92:'\\',93:']',94:'^',95:'_',96:'`',97:'a',98:'b',99:'c',100:'d',101:'e',102:'f',103:'g',104:'h',105:'i',106:'j',107:'k',108:'l',109:'m',110:'n',111:'o',112:'p',113:'q',114:'r',115:'s',116:'t',117:'u',118:'v',119:'w',120:'x',121:'y',122:'z',123:'{',124:'|',125:'}',126:'~',127:'\x7f',128:'\xC4',129:'\u0100',130:'\u0101',131:'\xC9',132:'\u0104',133:'\xD6',134:'\xDC',135:'\xE1',136:'\u0105',137:'\u010C',138:'\xE4',139:'\u010D',140:'\u0106',141:'\u0107',142:'\xE9',143:'\u0179',144:'\u017A',145:'\u010E',146:'\xED',147:'\u010F',148:'\u0112',149:'\u0113',150:'\u0116',151:'\xF3',152:'\u0117',153:'\xF4',154:'\xF6',155:'\xF5',156:'\xFA',157:'\u011A',158:'\u011B',159:'\xFC',160:'\u2020',161:'\xB0',162:'\u0118',163:'\xA3',164:'\xA7',165:'\u2022',166:'\xB6',167:'\xDF',168:'\xAE',169:'\xA9',170:'\u2122',171:'\u0119',172:'\xA8',173:'\u2260',174:'\u0123',175:'\u012E',176:'\u012F',177:'\u012A',178:'\u2264',179:'\u2265',180:'\u012B',181:'\u0136',182:'\u2202',183:'\u2211',184:'\u0142',185:'\u013B',186:'\u013C',187:'\u013D',188:'\u013E',189:'\u0139',190:'\u013A',191:'\u0145',192:'\u0146',193:'\u0143',194:'\xAC',195:'\u221A',196:'\u0144',197:'\u0147',198:'\u2206',199:'\xAB',200:'\xBB',201:'\u2026',202:'\xA0',203:'\u0148',204:'\u0150',205:'\xD5',206:'\u0151',207:'\u014C',208:'\u2013',209:'\u2014',210:'\u201C',211:'\u201D',212:'\u2018',213:'\u2019',214:'\xF7',215:'\u25CA',216:'\u014D',217:'\u0154',218:'\u0155',219:'\u0158',220:'\u2039',221:'\u203A',222:'\u0159',223:'\u0156',224:'\u0157',225:'\u0160',226:'\u201A',227:'\u201E',228:'\u0161',229:'\u015A',230:'\u015B',231:'\xC1',232:'\u0164',233:'\u0165',234:'\xCD',235:'\u017D',236:'\u017E',237:'\u016A',238:'\xD3',239:'\xD4',240:'\u016B',241:'\u016E',242:'\xDA',243:'\u016F',244:'\u0170',245:'\u0171',246:'\u0172',247:'\u0173',248:'\xDD',249:'\xFD',250:'\u0137',251:'\u017B',252:'\u0141',253:'\u017C',254:'\u0122',255:'\u02C7'}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Based on https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT
2+
3+
__all__ = [
4+
'decodingTable',
5+
]
6+
7+
8+
decodingTable={0:'\x00',1:'\x01',2:'\x02',3:'\x03',4:'\x04',5:'\x05',6:'\x06',7:'\x07',8:'\x08',9:'\t',10:'\n',11:'\x0b',12:'\x0c',13:'\r',14:'\x0e',15:'\x0f',16:'\x10',17:'\x11',18:'\x12',19:'\x13',20:'\x14',21:'\x15',22:'\x16',23:'\x17',24:'\x18',25:'\x19',26:'\x1a',27:'\x1b',28:'\x1c',29:'\x1d',30:'\x1e',31:'\x1f',32:' ',33:'!',34:'"',35:'#',36:'$',37:'%',38:'&',39:"'",40:'(',41:')',42:'*',43:'+',44:',',45:'-',46:'.',47:'/',48:'0',49:'1',50:'2',51:'3',52:'4',53:'5',54:'6',55:'7',56:'8',57:'9',58:':',59:';',60:'<',61:'=',62:'>',63:'?',64:'@',65:'A',66:'B',67:'C',68:'D',69:'E',70:'F',71:'G',72:'H',73:'I',74:'J',75:'K',76:'L',77:'M',78:'N',79:'O',80:'P',81:'Q',82:'R',83:'S',84:'T',85:'U',86:'V',87:'W',88:'X',89:'Y',90:'Z',91:'[',92:'\\',93:']',94:'^',95:'_',96:'`',97:'a',98:'b',99:'c',100:'d',101:'e',102:'f',103:'g',104:'h',105:'i',106:'j',107:'k',108:'l',109:'m',110:'n',111:'o',112:'p',113:'q',114:'r',115:'s',116:'t',117:'u',118:'v',119:'w',120:'x',121:'y',122:'z',123:'{',124:'|',125:'}',126:'~',127:'\x7f',128:'\xC4',129:'\xB9',130:'\xB2',131:'\xC9',132:'\xB3',133:'\xD6',134:'\xDC',135:'\u0385',136:'\xE0',137:'\xE2',138:'\xE4',139:'\u0384',140:'\xA8',141:'\xE7',142:'\xE9',143:'\xE8',144:'\xEA',145:'\xEB',146:'\xA3',147:'\u2122',148:'\xEE',149:'\xEF',150:'\u2022',151:'\xBD',152:'\u2030',153:'\xF4',154:'\xF6',155:'\xA6',156:'\xAD',157:'\xF9',158:'\xFB',159:'\xFC',160:'\u2020',161:'\u0393',162:'\u0394',163:'\u0398',164:'\u039B',165:'\u039E',166:'\u03A0',167:'\xDF',168:'\xAE',169:'\xA9',170:'\u03A3',171:'\u03AA',172:'\xA7',173:'\u2260',174:'\xB0',175:'\u0387',176:'\u0391',177:'\xB1',178:'\u2264',179:'\u2265',180:'\xA5',181:'\u0392',182:'\u0395',183:'\u0396',184:'\u0397',185:'\u0399',186:'\u039A',187:'\u039C',188:'\u03A6',189:'\u03AB',190:'\u03A8',191:'\u03A9',192:'\u03AC',193:'\u039D',194:'\xAC',195:'\u039F',196:'\u03A1',197:'\u2248',198:'\u03A4',199:'\xAB',200:'\xBB',201:'\u2026',202:'\xA0',203:'\u03A5',204:'\u03A7',205:'\u0386',206:'\u0388',207:'\u0153',208:'\u2013',209:'\u2015',210:'\u201C',211:'\u201D',212:'\u2018',213:'\u2019',214:'\xF7',215:'\u0389',216:'\u038A',217:'\u038C',218:'\u038E',219:'\u03AD',220:'\u03AE',221:'\u03AF',222:'\u03CC',223:'\u038F',224:'\u03CD',225:'\u03B1',226:'\u03B2',227:'\u03C8',228:'\u03B4',229:'\u03B5',230:'\u03C6',231:'\u03B3',232:'\u03B7',233:'\u03B9',234:'\u03BE',235:'\u03BA',236:'\u03BB',237:'\u03BC',238:'\u03BD',239:'\u03BF',240:'\u03C0',241:'\u03CE',242:'\u03C1',243:'\u03C3',244:'\u03C4',245:'\u03B8',246:'\u03C9',247:'\u03C2',248:'\u03C7',249:'\u03C5',250:'\u03B6',251:'\u03CA',252:'\u03CB',253:'\u0390',254:'\u03B0'}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Based on https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT
2+
3+
__all__ = [
4+
'decodingTable',
5+
]
6+
7+
8+
decodingTable={0:'\x00',1:'\x01',2:'\x02',3:'\x03',4:'\x04',5:'\x05',6:'\x06',7:'\x07',8:'\x08',9:'\t',10:'\n',11:'\x0b',12:'\x0c',13:'\r',14:'\x0e',15:'\x0f',16:'\x10',17:'\x11',18:'\x12',19:'\x13',20:'\x14',21:'\x15',22:'\x16',23:'\x17',24:'\x18',25:'\x19',26:'\x1a',27:'\x1b',28:'\x1c',29:'\x1d',30:'\x1e',31:'\x1f',32:' ',33:'!',34:'"',35:'#',36:'$',37:'%',38:'&',39:"'",40:'(',41:')',42:'*',43:'+',44:',',45:'-',46:'.',47:'/',48:'0',49:'1',50:'2',51:'3',52:'4',53:'5',54:'6',55:'7',56:'8',57:'9',58:':',59:';',60:'<',61:'=',62:'>',63:'?',64:'@',65:'A',66:'B',67:'C',68:'D',69:'E',70:'F',71:'G',72:'H',73:'I',74:'J',75:'K',76:'L',77:'M',78:'N',79:'O',80:'P',81:'Q',82:'R',83:'S',84:'T',85:'U',86:'V',87:'W',88:'X',89:'Y',90:'Z',91:'[',92:'\\',93:']',94:'^',95:'_',96:'`',97:'a',98:'b',99:'c',100:'d',101:'e',102:'f',103:'g',104:'h',105:'i',106:'j',107:'k',108:'l',109:'m',110:'n',111:'o',112:'p',113:'q',114:'r',115:'s',116:'t',117:'u',118:'v',119:'w',120:'x',121:'y',122:'z',123:'{',124:'|',125:'}',126:'~',127:'\x7f',128:'\xC4',129:'\xC5',130:'\xC7',131:'\xC9',132:'\xD1',133:'\xD6',134:'\xDC',135:'\xE1',136:'\xE0',137:'\xE2',138:'\xE4',139:'\xE3',140:'\xE5',141:'\xE7',142:'\xE9',143:'\xE8',144:'\xEA',145:'\xEB',146:'\xED',147:'\xEC',148:'\xEE',149:'\xEF',150:'\xF1',151:'\xF3',152:'\xF2',153:'\xF4',154:'\xF6',155:'\xF5',156:'\xFA',157:'\xF9',158:'\xFB',159:'\xFC',160:'\xDD',161:'\xB0',162:'\xA2',163:'\xA3',164:'\xA7',165:'\u2022',166:'\xB6',167:'\xDF',168:'\xAE',169:'\xA9',170:'\u2122',171:'\xB4',172:'\xA8',173:'\u2260',174:'\xC6',175:'\xD8',176:'\u221E',177:'\xB1',178:'\u2264',179:'\u2265',180:'\xA5',181:'\xB5',182:'\u2202',183:'\u2211',184:'\u220F',185:'\u03C0',186:'\u222B',187:'\xAA',188:'\xBA',189:'\u2126',190:'\xE6',191:'\xF8',192:'\xBF',193:'\xA1',194:'\xAC',195:'\u221A',196:'\u0192',197:'\u2248',198:'\u2206',199:'\xAB',200:'\xBB',201:'\u2026',202:'\xA0',203:'\xC0',204:'\xC3',205:'\xD5',206:'\u0152',207:'\u0153',208:'\u2013',209:'\u2014',210:'\u201C',211:'\u201D',212:'\u2018',213:'\u2019',214:'\xF7',215:'\u25CA',216:'\xFF',217:'\u0178',218:'\u2044',219:'\xA4',220:'\xD0',221:'\xF0',222:'\xDE',223:'\xFE',224:'\xFD',225:'\xB7',226:'\u201A',227:'\u201E',228:'\u2030',229:'\xC2',230:'\xCA',231:'\xC1',232:'\xCB',233:'\xC8',234:'\xCD',235:'\xCE',236:'\xCF',237:'\xCC',238:'\xD3',239:'\xD4',241:'\xD2',242:'\xDA',243:'\xDB',244:'\xD9',245:'\u0131',246:'\u02C6',247:'\u02DC',248:'\xAF',249:'\u02D8',250:'\u02D9',251:'\u02DA',252:'\xB8',253:'\u02DD',254:'\u02DB',255:'\u02C7'}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Based on https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT
2+
3+
__all__ = [
4+
'decodingTable',
5+
]
6+
7+
8+
decodingTable={0:'\x00',1:'\x01',2:'\x02',3:'\x03',4:'\x04',5:'\x05',6:'\x06',7:'\x07',8:'\x08',9:'\t',10:'\n',11:'\x0b',12:'\x0c',13:'\r',14:'\x0e',15:'\x0f',16:'\x10',17:'\x11',18:'\x12',19:'\x13',20:'\x14',21:'\x15',22:'\x16',23:'\x17',24:'\x18',25:'\x19',26:'\x1a',27:'\x1b',28:'\x1c',29:'\x1d',30:'\x1e',31:'\x1f',32:' ',33:'!',34:'"',35:'#',36:'$',37:'%',38:'&',39:"'",40:'(',41:')',42:'*',43:'+',44:',',45:'-',46:'.',47:'/',48:'0',49:'1',50:'2',51:'3',52:'4',53:'5',54:'6',55:'7',56:'8',57:'9',58:':',59:';',60:'<',61:'=',62:'>',63:'?',64:'@',65:'A',66:'B',67:'C',68:'D',69:'E',70:'F',71:'G',72:'H',73:'I',74:'J',75:'K',76:'L',77:'M',78:'N',79:'O',80:'P',81:'Q',82:'R',83:'S',84:'T',85:'U',86:'V',87:'W',88:'X',89:'Y',90:'Z',91:'[',92:'\\',93:']',94:'^',95:'_',96:'`',97:'a',98:'b',99:'c',100:'d',101:'e',102:'f',103:'g',104:'h',105:'i',106:'j',107:'k',108:'l',109:'m',110:'n',111:'o',112:'p',113:'q',114:'r',115:'s',116:'t',117:'u',118:'v',119:'w',120:'x',121:'y',122:'z',123:'{',124:'|',125:'}',126:'~',127:'\x7f',128:'\xC4',129:'\xC5',130:'\xC7',131:'\xC9',132:'\xD1',133:'\xD6',134:'\xDC',135:'\xE1',136:'\xE0',137:'\xE2',138:'\xE4',139:'\xE3',140:'\xE5',141:'\xE7',142:'\xE9',143:'\xE8',144:'\xEA',145:'\xEB',146:'\xED',147:'\xEC',148:'\xEE',149:'\xEF',150:'\xF1',151:'\xF3',152:'\xF2',153:'\xF4',154:'\xF6',155:'\xF5',156:'\xFA',157:'\xF9',158:'\xFB',159:'\xFC',160:'\u2020',161:'\xB0',162:'\xA2',163:'\xA3',164:'\xA7',165:'\u2022',166:'\xB6',167:'\xDF',168:'\xAE',169:'\xA9',170:'\u2122',171:'\xB4',172:'\xA8',173:'\u2260',174:'\xC6',175:'\xD8',176:'\u221E',177:'\xB1',178:'\u2264',179:'\u2265',180:'\xA5',181:'\xB5',182:'\u2202',183:'\u2211',184:'\u220F',185:'\u03C0',186:'\u222B',187:'\xAA',188:'\xBA',189:'\u2126',190:'\xE6',191:'\xF8',192:'\xBF',193:'\xA1',194:'\xAC',195:'\u221A',196:'\u0192',197:'\u2248',198:'\u2206',199:'\xAB',200:'\xBB',201:'\u2026',202:'\xA0',203:'\xC0',204:'\xC3',205:'\xD5',206:'\u0152',207:'\u0153',208:'\u2013',209:'\u2014',210:'\u201C',211:'\u201D',212:'\u2018',213:'\u2019',214:'\xF7',215:'\u25CA',216:'\xFF',217:'\u0178',218:'\u011E',219:'\u011F',220:'\u0130',221:'\u0131',222:'\u015E',223:'\u015F',224:'\u2021',225:'\xB7',226:'\u201A',227:'\u201E',228:'\u2030',229:'\xC2',230:'\xCA',231:'\xC1',232:'\xCB',233:'\xC8',234:'\xCD',235:'\xCE',236:'\xCF',237:'\xCC',238:'\xD3',239:'\xD4',241:'\xD2',242:'\xDA',243:'\xDB',244:'\xD9',246:'\u02C6',247:'\u02DC',248:'\xAF',249:'\u02D8',250:'\u02D9',251:'\u02DA',252:'\xB8',253:'\u02DD',254:'\u02DB',255:'\u02C7'}

0 commit comments

Comments
 (0)