@@ -7,177 +7,20 @@ import {Logger} from 'pino';
77import { constructEvent , EventOfType } from '../types/domain-event' ;
88import { v4 } from 'uuid' ;
99import { UUID } from 'io-ts-types' ;
10- import { DateTime } from 'luxon' ;
11- import { EpochTimestampMilliseconds } from '../read-models/shared-state/return-types' ;
1210import { GoogleSheetMetadata } from './extract-metadata' ;
1311import { GoogleSpreadsheetDataForSheet } from '../init-dependencies/google/pull_sheet_data' ;
1412import { lookup } from 'fp-ts/ReadonlyArray' ;
1513import { array } from 'fp-ts' ;
16-
17- // Bounds to prevent clearly broken parsing.
18- const MIN_RECOGNISED_MEMBER_NUMBER = 0 ;
19- const MAX_RECOGNISED_MEMBER_NUMBER = 10_000 ;
20-
21- const MAX_RECOGNISED_SCORE = 10_000 ;
22- const MIN_RECOGNISED_SCORE = 0 ;
23-
24- const MIN_VALID_TIMESTAMP_EPOCH_MS =
25- 1546304461_000 as EpochTimestampMilliseconds ; // Year 2019, Can't see any training results before this.
14+ import {
15+ extractEmail ,
16+ extractMemberNumber ,
17+ extractScore ,
18+ extractTimestamp ,
19+ } from './util' ;
20+ import { formatValidationErrors } from 'io-ts-reporters' ;
2621
2722const FORM_RESPONSES_SHEET_REGEX = / ^ F o r m R e s p o n s e s [ 0 - 9 ] * / i;
2823
29- const FORMATS_TO_TRY = [
30- 'dd/MM/yyyy HH:mm:ss' ,
31- 'MM/dd/yyyy HH:mm:ss' ,
32- 'M/dd/yyyy HH:mm:ss' ,
33- 'dd/M/yyyy HH:mm:ss' ,
34- 'M/d/yyyy HH:mm:ss' ,
35- 'd/M/yyyy HH:mm:ss' ,
36-
37- 'dd/MM/yyyy H:m:s' ,
38- 'MM/dd/yyyy H:m:s' ,
39- 'M/dd/yyyy H:m:s' ,
40- 'dd/M/yyyy H:m:s' ,
41- 'M/d/yyyy H:m:s' ,
42- 'd/M/yyyy H:m:s' ,
43-
44- 'yyyy-MM-dd HH:mm:ss' ,
45- ] ;
46-
47- const extractScore = (
48- rowValue : string | undefined | null
49- ) : O . Option < {
50- score : number ;
51- maxScore : number ;
52- percentage : number ;
53- } > => {
54- if ( ! rowValue ) {
55- return O . none ;
56- }
57- const parts = rowValue . split ( ' / ' ) ;
58- if ( parts . length !== 2 ) {
59- return O . none ;
60- }
61-
62- const score = parseInt ( parts [ 0 ] , 10 ) ;
63- if (
64- isNaN ( score ) ||
65- score < MIN_RECOGNISED_SCORE ||
66- score > MAX_RECOGNISED_SCORE
67- ) {
68- return O . none ;
69- }
70-
71- const maxScore = parseInt ( parts [ 1 ] , 10 ) ;
72- if (
73- isNaN ( maxScore ) ||
74- maxScore < MIN_RECOGNISED_SCORE ||
75- maxScore > MAX_RECOGNISED_SCORE ||
76- maxScore < score
77- ) {
78- return O . none ;
79- }
80-
81- const percentage = Math . round ( ( score / maxScore ) * 100 ) ;
82-
83- return O . some ( {
84- score,
85- maxScore,
86- percentage,
87- } ) ;
88- } ;
89-
90- const extractEmail = (
91- rowValue : string | undefined | null
92- ) : O . Option < string > => {
93- if ( ! rowValue ) {
94- return O . none ;
95- }
96- // We may want to add further normalisation to user emails such as making them
97- // all lowercase (when used as a id) to prevent user confusion.
98- return O . some ( rowValue . trim ( ) ) ;
99- } ;
100-
101- const extractMemberNumber = (
102- rowValue : string | number | undefined | null
103- ) : O . Option < number > => {
104- if ( ! rowValue ) {
105- return O . none ;
106- }
107- if ( typeof rowValue === 'string' ) {
108- rowValue = parseInt ( rowValue . trim ( ) , 10 ) ;
109- }
110-
111- if (
112- isNaN ( rowValue ) ||
113- rowValue <= MIN_RECOGNISED_MEMBER_NUMBER ||
114- rowValue > MAX_RECOGNISED_MEMBER_NUMBER
115- ) {
116- return O . none ;
117- }
118-
119- return O . some ( rowValue ) ;
120- } ;
121-
122- const timestampValid = (
123- raw : string ,
124- timezone : string ,
125- ts : DateTime
126- ) : E . Either < string , EpochTimestampMilliseconds > => {
127- let timestampEpochMS ;
128- try {
129- if ( ts . isValid ) {
130- timestampEpochMS = ( ts . toUnixInteger ( ) *
131- 1000 ) as EpochTimestampMilliseconds ;
132- } else {
133- return E . left (
134- `Failed to parse timestamp: ${ raw } in timezone ${ timezone } , reason: ${ ts . invalidReason } `
135- ) ;
136- }
137- } catch ( e ) {
138- let errStr = 'unknown' ;
139- if ( e instanceof Error ) {
140- errStr = `${ e . name } : ${ e . message } ` ;
141- }
142- return E . left (
143- `Unable to parse timestamp: '${ raw } ' in timezone ${ timezone } , err: ${ errStr } `
144- ) ;
145- }
146- if (
147- isNaN ( timestampEpochMS ) ||
148- ! isFinite ( timestampEpochMS ) ||
149- timestampEpochMS < MIN_VALID_TIMESTAMP_EPOCH_MS ||
150- timestampEpochMS > DateTime . utc ( ) . toUnixInteger ( ) * 10 * 60 * 1000
151- ) {
152- return E . left (
153- `Produced timestamp is invalid/out-of-range: '${ raw } ', timezone: '${ timezone } ' decoded to ${ timestampEpochMS } `
154- ) ;
155- }
156- return E . right ( timestampEpochMS ) ;
157- } ;
158-
159- export const extractTimestamp =
160- ( timezone : string ) =>
161- (
162- rowValue : O . Option < string >
163- ) : E . Either < string , EpochTimestampMilliseconds > => {
164- if ( ! rowValue || O . isNone ( rowValue ) ) {
165- return E . left ( 'Missing column value' ) ;
166- }
167- let timestampEpochMS ;
168- for ( const format of FORMATS_TO_TRY ) {
169- const ts = DateTime . fromFormat ( rowValue . value , format , {
170- setZone : true ,
171- zone : timezone ,
172- } ) ;
173- timestampEpochMS = timestampValid ( rowValue . value , timezone , ts ) ;
174- if ( E . isRight ( timestampEpochMS ) ) {
175- return timestampEpochMS ;
176- }
177- }
178- return timestampEpochMS as E . Left < string > ;
179- } ;
180-
18124const extractFromRow =
18225 (
18326 logger : Logger ,
@@ -222,10 +65,10 @@ const extractFromRow =
22265 O . flatten
22366 ) ;
22467 const timestampEpochMS = pipe (
225- row . values ,
226- lookup ( metadata . mappedColumns . timestamp ) ,
68+ lookup ( metadata . mappedColumns . timestamp ) ( row . values ) ,
22769 O . map ( entry => entry . formattedValue ) ,
228- extractTimestamp ( timezone )
70+ O . getOrElse < string | null > ( ( ) => null ) ,
71+ extractTimestamp ( timezone ) . decode
22972 ) ;
23073
23174 if ( O . isNone ( email ) && O . isNone ( memberNumber ) ) {
@@ -244,7 +87,7 @@ const extractFromRow =
24487 if ( E . isLeft ( timestampEpochMS ) ) {
24588 logger . warn (
24689 'Failed to extract timestamp from row, skipped row, reason: %s' ,
247- timestampEpochMS . left
90+ formatValidationErrors ( timestampEpochMS . left )
24891 ) ;
24992 return O . none ;
25093 }
0 commit comments