diff --git a/lib/scripts/msg/msg-parser.ts b/lib/scripts/msg/msg-parser.ts index a2e52e9..36e3f5d 100644 --- a/lib/scripts/msg/msg-parser.ts +++ b/lib/scripts/msg/msg-parser.ts @@ -1,9 +1,9 @@ import { CompoundFile } from "./compound-file/compound-file"; import { TEXT_DECODER } from "./compound-file/constants/text-decoder"; import type { DirectoryEntry } from "./compound-file/directory/types/directory-entry"; -import { ATTACH_PROPERTIES, PropertySource, RECIP_PROPERTIES, ROOT_PROPERTIES, type Property } from "./streams/property/properties"; +import { ATTACH_PROPERTIES, CODEPAGE_PROPERTY, CODEPAGES, PropertySource, RECIP_PROPERTIES, ROOT_PROPERTIES, type Property } from "./streams/property/properties"; import { getPropertyStreamEntry } from "./streams/property/property-stream"; -import { PtypBinary, PtypObject, PtypString, PtypTime, type PropertyType } from "./streams/property/property-types"; +import { PtypBinary, PtypObject, PtypString, PtypString8, PtypTime, type PropertyType } from "./streams/property/property-types"; import type { PropertyStreamEntry } from "./streams/property/types/property-stream-entry"; import type { Attachment, Message, MessageContent, Recipient } from "./types/message"; @@ -26,7 +26,8 @@ export function parseDir(file: CompoundFile, dir: DirectoryEntry): Message { } function getContent(file: CompoundFile, dir: DirectoryEntry, pStreamEntry: PropertyStreamEntry): MessageContent { - return getValue(file, ROOT_PROPERTIES, dir, pStreamEntry); + const codepage = getCodepage(file, dir, pStreamEntry); + return getValue(file, ROOT_PROPERTIES, dir, pStreamEntry, codepage); } function getRecipients(file: CompoundFile, dir: DirectoryEntry): Recipient[] { @@ -51,13 +52,21 @@ function getValues(file: CompoundFile, dir: DirectoryEntry, properties: Prope return list; } -function getValue(file: CompoundFile, properties: Property[], dir: DirectoryEntry, entry: PropertyStreamEntry): T { +function getCodepage(file: CompoundFile, dir: DirectoryEntry, entry: PropertyStreamEntry): number | undefined { + return getValue<{ codepage: number | undefined }>(file, [CODEPAGE_PROPERTY], dir, entry).codepage; +} + +function getValue(file: CompoundFile, properties: Property[], dir: DirectoryEntry, entry: PropertyStreamEntry, codepage?: number): T { return properties.reduce((acc, p) => { if (p.source == PropertySource.Stream) { - const streamName = `__substg1.0_${p.id.padStart(4, "0")}${p.type.id.toString(16).padStart(4, "0")}`; - const entry = file.directory.get(streamName, dir.childId, false); - if (!entry) return acc; - acc[p.name as keyof T] = getValueFromStream(file, entry, p.type) as T[keyof T]; + for (const ptype of p.types) { + const streamName = `__substg1.0_${p.id.padStart(4, "0")}${ptype.id.toString(16).padStart(4, "0")}`; + const entry = file.directory.get(streamName, dir.childId, false); + if (entry) { + acc[p.name as keyof T] = getValueFromStream(file, entry, ptype, codepage) as T[keyof T]; + break; + } + } } else { const value = getValueFromProperty(entry, p); if (!value) return acc; @@ -72,7 +81,7 @@ function getValueFromProperty(entry: PropertyStreamEntry, property: Property) { const value = entry.data.get(property.id.toLowerCase())?.valueOrSize; if (!value) return ""; - switch (property.type) { + switch (property.types[0]) { case PtypTime: { // Subtracting the number of seconds between January 1, 1601 and January 1, 1970. return new Date(Number(value as bigint / 10000n) - 1.16444736e13); @@ -81,7 +90,7 @@ function getValueFromProperty(entry: PropertyStreamEntry, property: Property) { } } -function getValueFromStream(file: CompoundFile, entry: DirectoryEntry, type: PropertyType) { +function getValueFromStream(file: CompoundFile, entry: DirectoryEntry, type: PropertyType, codepage?: number): string | DataView | DirectoryEntry | null { switch (type) { case PtypString: { let value = ""; @@ -91,6 +100,15 @@ function getValueFromStream(file: CompoundFile, entry: DirectoryEntry, type: Pro return value; }; + case PtypString8: { + const decoder = new TextDecoder(CODEPAGES.get(codepage || 65001)); + let value = ""; + file.readStream(entry, (offset, bytes) => { + value += decoder.decode(new DataView(file.view.buffer, offset, bytes)); + }); + + return value; + }; case PtypBinary: { const chunks = new Uint8Array(Number(entry.streamSize)); let pos = 0; diff --git a/lib/scripts/msg/streams/property/properties.ts b/lib/scripts/msg/streams/property/properties.ts index 8b920c9..820ac96 100644 --- a/lib/scripts/msg/streams/property/properties.ts +++ b/lib/scripts/msg/streams/property/properties.ts @@ -1,41 +1,83 @@ -import { PtypBinary, PtypObject, PtypString, PtypTime, type PropertyType } from "./property-types"; +import { PtypBinary, PtypObject, PtypString, PtypString8, PtypTime, PtypInteger32, type PropertyType } from "./property-types"; export const enum PropertySource { Stream, // Property can be found in a dedicated stream Property // Property is located in property stream } +export const CODEPAGE_PROPERTY: Property = { id: "3FDE", name:"codepage", types: [PtypInteger32], source: PropertySource.Property }; + +export const CODEPAGES = new Map([ + [874, "windows-874"], + [932, "shift_jis"], + [936, "gb2312"], + [949, "big5"], + [1200, "utf-16"], + [1201, "utf-16be"], + [1250, "windows-1250"], + [1251, "windows-1251"], + [1252, "windows-1252"], + [1253, "windows-1253"], + [1254, "windows-1254"], + [1255, "windows-1255"], + [1256, "windows-1256"], + [1257, "windows-1257"], + [1258, "windows-1258"], + [20127, "us-ascii"], + [20866, "koi8-r"], + [21866, "koi8-u"], + [28591, "iso-8859-1"], + [28592, "iso-8859-2"], + [28593, "iso-8859-3"], + [28594, "iso-8859-4"], + [28595, "iso-8859-5"], + [28596, "iso-8859-6"], + [28597, "iso-8859-7"], + [38598, "iso-8859-8"], + [28599, "iso-8859-9"], + [28603, "iso-8859-13"], + [28604, "iso-8859-14"], + [28605, "iso-8859-15"], + [28606, "iso-8859-16"], + [50220, "iso-2022-jp"], + [50221, "csISO2022JP"], + [51932, "euc-jp"], + [51949, "euc-kr"], + [52936, "gb_2312"], + [65001, "utf-8"], +]); + export const ROOT_PROPERTIES: Property[] = [ - { id: "0E06", name:"date", type: PtypTime, source: PropertySource.Property }, - { id: "0037", name:"subject", type: PtypString, source: PropertySource.Stream }, - { id: "0c1a", name:"senderName", type: PtypString, source: PropertySource.Stream }, - { id: "5d02", name:"senderEmail", type: PtypString, source: PropertySource.Stream }, - { id: "1000", name:"body", type: PtypString, source: PropertySource.Stream }, - { id: "1013", name:"bodyHTML", type: PtypString, source: PropertySource.Stream }, - { id: "1009", name:"bodyRTF", type: PtypBinary, source: PropertySource.Stream }, - { id: "007d", name:"headers", type: PtypString, source: PropertySource.Stream }, - { id: "0E04", name:"toRecipients", type: PtypString, source: PropertySource.Stream }, - { id: "0E03", name:"ccRecipients", type: PtypString, source: PropertySource.Stream }, + { id: "0E06", name:"date", types: [PtypTime], source: PropertySource.Property }, + { id: "0037", name:"subject", types: [PtypString, PtypString8], source: PropertySource.Stream }, + { id: "0c1a", name:"senderName", types: [PtypString, PtypString8], source: PropertySource.Stream }, + { id: "0c1f", name:"senderEmail", types: [PtypString, PtypString8], source: PropertySource.Stream }, + { id: "1000", name:"body", types: [PtypString, PtypString8], source: PropertySource.Stream }, + { id: "1013", name:"bodyHTML", types: [PtypString, PtypString8], source: PropertySource.Stream }, + { id: "1009", name:"bodyRTF", types: [PtypBinary, PtypString8], source: PropertySource.Stream }, + { id: "007d", name:"headers", types: [PtypString, PtypString8], source: PropertySource.Stream }, + { id: "0E04", name:"toRecipients", types: [PtypString, PtypString8], source: PropertySource.Stream }, + { id: "0E03", name:"ccRecipients", types: [PtypString, PtypString8], source: PropertySource.Stream }, ]; export const ATTACH_PROPERTIES: Property[]= [ - { id: "3703", name:"extension", type: PtypString, source: PropertySource.Stream }, - { id: "3707", name:"fileName", type: PtypString, source: PropertySource.Stream }, - { id: "370e", name:"mimeType", type: PtypString, source: PropertySource.Stream }, - { id: "3A0C", name:"language", type: PtypString, source: PropertySource.Stream }, - { id: "3001", name:"displayName", type: PtypString, source: PropertySource.Stream }, - { id: "3701", name:"content", type: PtypBinary, source: PropertySource.Stream }, - { id: "3701", name:"embeddedMsgObj", type: PtypObject, source: PropertySource.Stream }, + { id: "3703", name:"extension", types: [PtypString, PtypString8], source: PropertySource.Stream }, + { id: "3707", name:"fileName", types: [PtypString, PtypString8], source: PropertySource.Stream }, + { id: "370e", name:"mimeType", types: [PtypString, PtypString8], source: PropertySource.Stream }, + { id: "3A0C", name:"language", types: [PtypString, PtypString8], source: PropertySource.Stream }, + { id: "3001", name:"displayName", types: [PtypString, PtypString8], source: PropertySource.Stream }, + { id: "3701", name:"content", types: [PtypBinary], source: PropertySource.Stream }, + { id: "3701", name:"embeddedMsgObj", types: [PtypObject], source: PropertySource.Stream }, ]; export const RECIP_PROPERTIES: Property[] = [ - { id: "3001", name:"name", type: PtypString, source: PropertySource.Stream }, - { id: "39fe", name:"email", type: PtypString, source: PropertySource.Stream }, + { id: "3001", name:"name", types: [PtypString, PtypString8], source: PropertySource.Stream }, + { id: "39fe", name:"email", types: [PtypString, PtypString8], source: PropertySource.Stream }, ]; export interface Property { id: string, name: string, - type: PropertyType, + types: PropertyType[], source: PropertySource, }