1
1
import { sortBy } from '@seedcompany/common' ;
2
2
import levenshtein from 'fastest-levenshtein' ;
3
- import { startCase , without } from 'lodash' ;
3
+ import { startCase } from 'lodash' ;
4
4
import { type Column } from '~/common/xlsx.util' ;
5
5
import { ProductStep as Step } from '../product/dto' ;
6
6
import { type PnpExtractionResult , PnpProblemType } from './extraction-result' ;
@@ -15,8 +15,8 @@ export function findStepColumns(
15
15
result ?: PnpExtractionResult ,
16
16
availableSteps : readonly Step [ ] = [ ...Step ] ,
17
17
) {
18
- const matchedColumns : Partial < Record < Step , Column > > = { } ;
19
- let remainingSteps = availableSteps ;
18
+ const matchedColumns = new Map < Step , Column > ( ) ;
19
+ const remainingSteps = new Set ( availableSteps ) ;
20
20
const possibleSteps = sheet . stepLabels
21
21
. walkRight ( )
22
22
. filter ( ( cell ) => ! ! cell . asString )
@@ -25,33 +25,41 @@ export function findStepColumns(
25
25
possibleSteps . forEach ( ( { label, column, cell } , index ) => {
26
26
if ( index === possibleSteps . length - 1 ) {
27
27
// The last step should always be called Completed in CORD per Seth.
28
- // Written PnP already match , but OBS calls it Record. This is mislabeled
29
- // depending on the methodology.
30
- matchedColumns [ Step . Completed ] = column ;
28
+ // Written PnP already matches , but OBS calls it Record.
29
+ // This is mislabeled depending on the methodology.
30
+ matchedColumns . set ( Step . Completed , column ) ;
31
31
return ;
32
32
}
33
- const distances = remainingSteps . map ( ( step ) => {
34
- const humanLabel = startCase ( step ) . replace ( ' And ' , ' & ' ) ;
35
- const distance = levenshtein . distance ( label , humanLabel ) ;
36
- return [ step , distance ] as const ;
37
- } ) ;
38
- // Pick the step that is the closest fuzzy match
39
- const chosen = sortBy (
40
- // 5 is too far ignore those
41
- distances . filter ( ( [ _ , distance ] ) => distance < 5 ) ,
42
- ( [ _ , distance ] ) => distance ,
43
- ) [ 0 ] ?. [ 0 ] ;
33
+
34
+ const chosen = chooseStep ( label , remainingSteps ) ;
44
35
if ( ! chosen ) {
45
36
result ?. addProblem ( NonStandardStep , cell , { label } ) ;
46
37
return ;
47
38
}
48
- matchedColumns [ chosen ] = column ;
49
-
50
- remainingSteps = without ( remainingSteps , chosen ) ;
39
+ matchedColumns . set ( chosen , column ) ;
40
+ remainingSteps . delete ( chosen ) ;
51
41
} ) ;
52
- return matchedColumns as Record < Step , Column > ;
42
+ return matchedColumns as ReadonlyMap < Step , Column > ;
53
43
}
54
44
45
+ const chooseStep = (
46
+ label : string ,
47
+ available : ReadonlySet < Step > ,
48
+ ) : Step | undefined => {
49
+ const distances = available . values ( ) . map ( ( step ) => {
50
+ const humanLabel = startCase ( step ) . replace ( ' And ' , ' & ' ) ;
51
+ const distance = levenshtein . distance ( label , humanLabel ) ;
52
+ return { step, distance } ;
53
+ } ) ;
54
+ // Pick the step that is the closest fuzzy match
55
+ const chosen = sortBy (
56
+ // 5 is too far ignoring those
57
+ distances . filter ( ( { distance } ) => distance < 5 ) ,
58
+ ( { distance } ) => distance ,
59
+ ) . at ( 0 ) ;
60
+ return chosen ?. step ;
61
+ } ;
62
+
55
63
const NonStandardStep = PnpProblemType . register ( {
56
64
name : 'NonStandardStep' ,
57
65
severity : 'Error' ,
0 commit comments