11let AWS = require ( 'aws-sdk' ) ;
2+ let parser = require ( "mailparser" ) . simpleParser ;
3+
4+ AWS . config . logger = console ;
25
36//
47// Initialize S3.
@@ -17,21 +20,29 @@ exports.handler = (event) => {
1720 // 1. This JS object will contain all the data within the chain.
1821 //
1922 let container = {
20- from : event . Records [ 0 ] . ses . mail . commonHeaders . from [ 0 ] ,
21- to : event . Records [ 0 ] . ses . mail . commonHeaders . to [ 0 ] ,
22- subject : event . Records [ 0 ] . ses . mail . commonHeaders . subject ,
23- date : event . Records [ 0 ] . ses . mail . commonHeaders . date ,
24- message_id : event . Records [ 0 ] . ses . mail . messageId
25- }
26-
27- console . log ( container ) ;
23+ bucket : event . Records [ 0 ] . s3 . bucket . name ,
24+ unescaped_key : '' ,
25+ escaped_key : event . Records [ 0 ] . s3 . object . key
26+ } ;
2827
2928 //
3029 // -> Start the chain.
3130 //
32- extract_data ( container )
31+ unescape_key ( container )
3332 . then ( function ( container ) {
3433
34+ return load_the_email ( container ) ;
35+
36+ } ) . then ( function ( container ) {
37+
38+ return parse_the_email ( container ) ;
39+
40+ } ) . then ( function ( container ) {
41+
42+ return extract_data ( container ) ;
43+
44+ } ) . then ( function ( container ) {
45+
3546 return copy_the_email ( container ) ;
3647
3748 } ) . then ( function ( container ) {
@@ -60,6 +71,135 @@ exports.handler = (event) => {
6071// |_| |_| \_\ \____/ |_| |_| |_____| |_____/ |______| |_____/
6172//
6273
74+ //
75+ // We need to process the path received by S3 since AWS dose escape
76+ // the string in a special way. They escape the string in a HTML style
77+ // but for whatever reason they convert spaces in to +ses.
78+ //
79+ function unescape_key ( container )
80+ {
81+ return new Promise ( function ( resolve , reject ) {
82+
83+ console . info ( "unescape_key" ) ;
84+
85+ //
86+ // 1. First we convert the + in to spaces.
87+ //
88+ let plus_to_space = container . escaped_key . replace ( / \+ / g, ' ' ) ;
89+
90+ //
91+ // 2. And then we unescape the string, other wise we lose
92+ // real + characters.
93+ //
94+ let unescaped_key = decodeURIComponent ( plus_to_space ) ;
95+
96+ //
97+ // 3. Save the result for the next promise.
98+ //
99+ container . unescaped_key = unescaped_key ;
100+
101+ //
102+ // -> Move to the next chain.
103+ //
104+ return resolve ( container ) ;
105+
106+ } ) ;
107+ }
108+
109+ //
110+ // Load the email that we received from SES.
111+ //
112+ function load_the_email ( container )
113+ {
114+ return new Promise ( function ( resolve , reject ) {
115+
116+ console . info ( "load_the_email" ) ;
117+
118+ //
119+ // 1. Set the query.
120+ //
121+ let params = {
122+ Bucket : container . bucket ,
123+ Key : container . unescaped_key
124+ } ;
125+
126+ //
127+ // -> Execute the query.
128+ //
129+ s3 . getObject ( params , function ( error , data ) {
130+
131+ //
132+ // 1. Check for internal errors.
133+ //
134+ if ( error )
135+ {
136+ console . error ( params ) ;
137+ return reject ( error ) ;
138+ }
139+
140+ //
141+ // 2. Save the email for the next promise
142+ //
143+ container . raw_email = data . Body
144+
145+ //
146+ // -> Move to the next chain.
147+ //
148+ return resolve ( container ) ;
149+
150+ } ) ;
151+
152+ } ) ;
153+ }
154+
155+ //
156+ // Once the raw email is loaded we parse it with one goal in mind, get
157+ // the date the of the email. This way we don't rely on the SES date, but
158+ // on the real date the email was created.
159+ //
160+ // This way we can even load in to the system old emails as long as they
161+ // are in the standard raw email format, and not some proprietary solution.
162+ //
163+ // That why will be organized with the time the emails were created, and not
164+ // received in to the system.
165+ //
166+ function parse_the_email ( container )
167+ {
168+ return new Promise ( function ( resolve , reject ) {
169+
170+ //
171+ // 1. Parse the email and extract all the it necessary.
172+ //
173+ parser ( container . raw_email , function ( error , data ) {
174+
175+ //
176+ // 1. Check for internal errors.
177+ //
178+ if ( error )
179+ {
180+ console . error ( data ) ;
181+ return reject ( error ) ;
182+ }
183+
184+ //
185+ // 2. Save the parsed email for the next promise.
186+ //
187+ container . date = data . date ;
188+ container . from = data . from . value [ 0 ] . address ,
189+ container . to = data . to . value [ 0 ] . address ,
190+ container . subject = data . subject ,
191+ container . message_id = data . messageId
192+
193+ //
194+ // -> Move to the next chain.
195+ //
196+ return resolve ( container ) ;
197+
198+ } ) ;
199+
200+ } ) ;
201+ }
202+
63203//
64204// Extract all the data necessary to organize the incoming emails.
65205//
@@ -70,16 +210,21 @@ function extract_data(container)
70210 console . info ( "extract_data" ) ;
71211
72212 //
73- // 1. Extract all the information
213+ // 1. Since the email string can come in a form of:
214+ //
215+ // Name Last <name@example.com>
216+ //
217+ // We have to extract just the email address, and discard
218+ // the rest.
74219 //
75220 let tmp_to = container
76221 . to
77- . match ( / [ a - z 0 - 9 - + ] { 1 , 30 } @ [ a - z 0 - 9 - ] { 1 , 65 } . [ a - z ] { 1 , } / gm) [ 0 ]
222+ . match ( / (?: [ a - z A - Z 0 - 9 ! # $ % & ' * + / = ? ^ _ ` { | } ~ - ] + (?: \. [ a - z A - Z 0 - 9 ! # $ % & ' * + / = ? ^ _ ` { | } ~ - ] + ) * | " (?: [ \x01 - \x08 \x0b \x0c \x0e - \x1f \x21 \x23 - \x5b \x5d - \x7f ] | \\ [ \x01 - \x09 \x0b \x0c \x0e - \x7f ] ) * " ) @ (?: (?: [ a - z A - Z 0 - 9 ] (?: [ a - z A - Z 0 - 9 - ] * [ a - z A - Z 0 - 9 ] ) ? \. ) + [ a - z A - Z 0 - 9 ] (?: [ a - z A - Z 0 - 9 - ] * [ a - z A - Z 0 - 9 ] ) ? | \[ (?: (?: 2 5 [ 0 - 5 ] | 2 [ 0 - 4 ] [ 0 - 9 ] | [ 0 1 ] ? [ 0 - 9 ] [ 0 - 9 ] ? ) \. ) { 3 } (?: 2 5 [ 0 - 5 ] | 2 [ 0 - 4 ] [ 0 - 9 ] | [ 0 1 ] ? [ 0 - 9 ] [ 0 - 9 ] ? | [ a - z A - Z 0 - 9 - ] * [ a - z A - Z 0 - 9 ] : (?: [ \x01 - \x08 \x0b \x0c \x0e - \x1f \x21 - \x5a \x53 - \x7f ] | \\ [ \x01 - \x09 \x0b \x0c \x0e - \x7f ] ) + ) \] ) / gm) [ 0 ]
78223 . split ( '@' ) ;
79224
80225 let tmp_from = container
81226 . from
82- . match ( / [ a - z 0 - 9 - + ] { 1 , 30 } @ [ a - z 0 - 9 - ] { 1 , 65 } . [ a - z ] { 1 , } / gm) [ 0 ]
227+ . match ( / (?: [ a - z A - Z 0 - 9 ! # $ % & ' * + / = ? ^ _ ` { | } ~ - ] + (?: \. [ a - z A - Z 0 - 9 ! # $ % & ' * + / = ? ^ _ ` { | } ~ - ] + ) * | " (?: [ \x01 - \x08 \x0b \x0c \x0e - \x1f \x21 \x23 - \x5b \x5d - \x7f ] | \\ [ \x01 - \x09 \x0b \x0c \x0e - \x7f ] ) * " ) @ (?: (?: [ a - z A - Z 0 - 9 ] (?: [ a - z A - Z 0 - 9 - ] * [ a - z A - Z 0 - 9 ] ) ? \. ) + [ a - z A - Z 0 - 9 ] (?: [ a - z A - Z 0 - 9 - ] * [ a - z A - Z 0 - 9 ] ) ? | \[ (?: (?: 2 5 [ 0 - 5 ] | 2 [ 0 - 4 ] [ 0 - 9 ] | [ 0 1 ] ? [ 0 - 9 ] [ 0 - 9 ] ? ) \. ) { 3 } (?: 2 5 [ 0 - 5 ] | 2 [ 0 - 4 ] [ 0 - 9 ] | [ 0 1 ] ? [ 0 - 9 ] [ 0 - 9 ] ? | [ a - z A - Z 0 - 9 - ] * [ a - z A - Z 0 - 9 ] : (?: [ \x01 - \x08 \x0b \x0c \x0e - \x1f \x21 - \x5a \x53 - \x7f ] | \\ [ \x01 - \x09 \x0b \x0c \x0e - \x7f ] ) + ) \] ) / gm) [ 0 ]
83228 . split ( '@' ) ;
84229
85230 //
@@ -140,7 +285,13 @@ function extract_data(container)
140285}
141286
142287//
143- // Copy the email to a new location.
288+ // Copy the email to a new location - we don't put the email that we
289+ // already have in memory since the system requires a COPY action and not
290+ // a PUT action.
291+ //
292+ // WARNING: We are using the escaped_key value, because there is a
293+ // know bug in the AWS JS SDK which won't unescape the
294+ // string, so you have to do it - AWS is aware of this issue.
144295//
145296function copy_the_email ( container )
146297{
@@ -152,13 +303,11 @@ function copy_the_email(container)
152303 // 1. Set the query.
153304 //
154305 let params = {
155- Bucket : process . env . BUCKET ,
156- CopySource : process . env . BUCKET + "/TMP/email_in/" + container . message_id ,
306+ Bucket : container . bucket ,
307+ CopySource : container . bucket + '/' + container . escaped_key ,
157308 Key : container . path
158309 } ;
159310
160- console . log ( params ) ;
161-
162311 //
163312 // -> Execute the query.
164313 //
@@ -169,6 +318,7 @@ function copy_the_email(container)
169318 //
170319 if ( error )
171320 {
321+ console . error ( params ) ;
172322 return reject ( error ) ;
173323 }
174324
@@ -195,26 +345,21 @@ function delete_the_email(container)
195345 // 1. Set the query.
196346 //
197347 let params = {
198- Bucket : process . env . BUCKET ,
199- Key : "TMP/email_in/" + container . message_id
348+ Bucket : container . bucket ,
349+ Key : container . unescaped_key
200350 } ;
201351
202- console . log ( params )
203-
204-
205352 //
206353 // -> Execute the query.
207354 //
208355 s3 . deleteObject ( params , function ( error , data ) {
209356
210- console . log ( error )
211- console . log ( data )
212-
213357 //
214358 // 1. Check for internal errors.
215359 //
216360 if ( error )
217361 {
362+ console . error ( params ) ;
218363 return reject ( error ) ;
219364 }
220365
0 commit comments