1
- import { sortBy } from '@seedcompany/common' ;
1
+ import { mapOf , sortBy } from '@seedcompany/common' ;
2
2
import levenshtein from 'fastest-levenshtein' ;
3
- import { startCase , without } from 'lodash' ;
3
+ import { startCase } from 'lodash' ;
4
4
import { type Column } from '~/common/xlsx.util' ;
5
5
import { ProductStep as Step } from '../product/dto' ;
6
6
import { type PnpExtractionResult , PnpProblemType } from './extraction-result' ;
7
7
import { type PlanningSheet } from './planning-sheet' ;
8
8
import { type ProgressSheet } from './progress-sheet' ;
9
9
10
+ const ApprovedAliases = mapOf < string , Step > ( [
11
+ [ 'draft & keyboard' , Step . ExegesisAndFirstDraft ] ,
12
+ [ 'first draft' , Step . ExegesisAndFirstDraft ] ,
13
+ [ 'exegesis, 1st draft, keyboard' , Step . ExegesisAndFirstDraft ] ,
14
+ [ 'internalization & first draft' , Step . ExegesisAndFirstDraft ] ,
15
+ [ 'exegesis 1st draft & keybrd' , Step . ExegesisAndFirstDraft ] ,
16
+ [ 'first draft & keyboard' , Step . ExegesisAndFirstDraft ] ,
17
+ [ 'exegesis, 1st draft. keyboard' , Step . ExegesisAndFirstDraft ] ,
18
+ [ 'team check & 1st testing' , Step . TeamCheck ] ,
19
+ [ 'team check & revision' , Step . TeamCheck ] ,
20
+ [ 'team check & 1st test' , Step . TeamCheck ] ,
21
+ [ 'field test' , Step . CommunityTesting ] ,
22
+ [ 'community check' , Step . CommunityTesting ] ,
23
+ [ 'community review' , Step . CommunityTesting ] ,
24
+ [ 'community testing & revision' , Step . CommunityTesting ] ,
25
+ ] ) ;
26
+
10
27
/**
11
28
* Fuzzy match available steps to their column address.
12
29
*/
@@ -15,43 +32,60 @@ export function findStepColumns(
15
32
result ?: PnpExtractionResult ,
16
33
availableSteps : readonly Step [ ] = [ ...Step ] ,
17
34
) {
18
- const matchedColumns : Partial < Record < Step , Column > > = { } ;
19
- let remainingSteps = availableSteps ;
35
+ const matchedColumns = new Map < Step , Column > ( ) ;
36
+ const remainingSteps = new Set ( availableSteps ) ;
20
37
const possibleSteps = sheet . stepLabels
21
38
. walkRight ( )
22
39
. filter ( ( cell ) => ! ! cell . asString )
23
- . map ( ( cell ) => ( { label : cell . asString ! , column : cell . column , cell } ) )
40
+ . map ( ( cell ) => ( {
41
+ label : cell . asString ! . trim ( ) ,
42
+ column : cell . column ,
43
+ cell,
44
+ } ) )
24
45
. toArray ( ) ;
25
46
possibleSteps . forEach ( ( { label, column, cell } , index ) => {
26
47
if ( index === possibleSteps . length - 1 ) {
27
48
// The last step should always be called Completed in CORD per Seth.
28
- // Written PnP already match , but OBS calls it Record. This is mislabeled
29
- // depending on the methodology.
30
- matchedColumns [ Step . Completed ] = column ;
49
+ // Written PnP already matches , but OBS calls it Record.
50
+ // This is mislabeled depending on the methodology.
51
+ matchedColumns . set ( Step . Completed , column ) ;
31
52
return ;
32
53
}
33
- const distances = remainingSteps . map ( ( step ) => {
34
- const humanLabel = startCase ( step ) . replace ( ' And ' , ' & ' ) ;
35
- const distance = levenshtein . distance ( label , humanLabel ) ;
36
- return [ step , distance ] as const ;
37
- } ) ;
38
- // Pick the step that is the closest fuzzy match
39
- const chosen = sortBy (
40
- // 5 is too far ignore those
41
- distances . filter ( ( [ _ , distance ] ) => distance < 5 ) ,
42
- ( [ _ , distance ] ) => distance ,
43
- ) [ 0 ] ?. [ 0 ] ;
54
+
55
+ const chosen = chooseStep ( label , remainingSteps ) ;
44
56
if ( ! chosen ) {
45
57
result ?. addProblem ( NonStandardStep , cell , { label } ) ;
46
58
return ;
47
59
}
48
- matchedColumns [ chosen ] = column ;
49
-
50
- remainingSteps = without ( remainingSteps , chosen ) ;
60
+ matchedColumns . set ( chosen , column ) ;
61
+ remainingSteps . delete ( chosen ) ;
51
62
} ) ;
52
- return matchedColumns as Record < Step , Column > ;
63
+ return matchedColumns as ReadonlyMap < Step , Column > ;
53
64
}
54
65
66
+ const chooseStep = (
67
+ label : string ,
68
+ available : ReadonlySet < Step > ,
69
+ ) : Step | undefined => {
70
+ const alias = ApprovedAliases . get ( label . toLowerCase ( ) ) ;
71
+ if ( alias ) {
72
+ return available . has ( alias ) ? alias : undefined ;
73
+ }
74
+
75
+ const distances = available . values ( ) . map ( ( step ) => {
76
+ const humanLabel = startCase ( step ) . replace ( ' And ' , ' & ' ) ;
77
+ const distance = levenshtein . distance ( label , humanLabel ) ;
78
+ return { step, distance } ;
79
+ } ) ;
80
+ // Pick the step that is the closest fuzzy match
81
+ const chosen = sortBy (
82
+ // 5 is too far ignoring those
83
+ distances . filter ( ( { distance } ) => distance < 5 ) ,
84
+ ( { distance } ) => distance ,
85
+ ) . at ( 0 ) ;
86
+ return chosen ?. step ;
87
+ } ;
88
+
55
89
const NonStandardStep = PnpProblemType . register ( {
56
90
name : 'NonStandardStep' ,
57
91
severity : 'Error' ,
0 commit comments